def __getitem__(self, index): minibatch_db = [self._roidb[index]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) rois = torch.from_numpy(blobs['rois']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) if data_height > self.trim_height: # this means that data_width < data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = self.trim_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) # shift y coordiante of rois rois[:, 2] = rois[:, 2] - float(y_s) rois[:, 4] = rois[:, 4] - float(y_s) # update rois bounding box according the trip rois[:, 2].clamp_(0, trim_size - 1) rois[:, 4].clamp_(0, trim_size - 1) elif data_width > self.trim_width: # this means that data_width > data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = self.trim_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # shift x coordiante of rois rois[:, 1] = rois[:, 1] - float(x_s) rois[:, 3] = rois[:, 3] - float(x_s) # update gt bounding box according the trip rois[:, 1].clamp_(0, trim_size - 1) rois[:, 3].clamp_(0, trim_size - 1) elif data_width > self.trim_width and data_height > self.trim_height: raise ValueError( "width > trim_width and height > trim_height, this should not happen!" ) else: # data_width == self.trim_width and data_height == self.trim_height trim_size = min(data_width, data_height) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] gt_boxes[:, :4].clamp_(0, trim_size) rois[:, 1:5].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) rois_not_keep = (rois[:, 1] == rois[:, 3]) | (rois[:, 2] == rois[:, 4]) rois_keep = torch.nonzero(rois_not_keep == 0).view(-1) rois = rois[rois_keep] max_num_rois = 2000 num_rois = min(rois.size(0), max_num_rois) rois_padding = torch.FloatTensor(max_num_rois, gt_boxes.size(1)).zero_() rois_padding[:num_rois, :] = rois[:num_rois] return padding_data, im_info, gt_boxes_padding, num_boxes, rois_padding else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes, rois
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor( int(np.ceil(data_width / ratio)), data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor( data_height, int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def __getitem__(self, index): index_ratio = int(self.ratio_index[index]) # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) # print(self.list_ind) blobs['gt_boxes'] = [ x for x in blobs['gt_boxes'] if x[-1] in self.list_ind ] blobs['gt_boxes'] = np.array(blobs['gt_boxes']) if self.training: # Random choice query catgory try: catgory = blobs['gt_boxes'][:, -1] except: print(blobs['gt_boxes']) exit(0) cand = np.unique(catgory) if len(cand) == 1: choice = cand[0] cla = self.class2cat[int(choice)] #---------------> sketch_array = self.cat2sketch[cla] # print(sketch_array) sketch = random.choices(sketch_array, k=4) sketch_array = [] for sk in sketch: # ------> Uncomment for sketches sk = pickle.load(open(sk, 'rb')) key = list(sk.keys())[0] sk = convert_to_np_raw(sk[key]) sk = np.stack((sk, sk, sk), axis=0) / 255.0 sketch_array.append(sk) sketch_array = np.stack(sketch_array, axis=0) #-------------> # sketch = random.choice(sketch_array) # sketch = pickle.load(open(sketch, 'rb')) # key = list(sketch.keys())[0] # sketch = convert_to_np_raw(sketch[key]) # sketch = np.stack((sketch, sketch, sketch), axis=0)/255.0 else: p = [] for i in cand: p.append(self.show_time[i]) p = np.array(p) p /= p.sum() choice = np.random.choice(cand, 1, p=p)[0] cla = self.class2cat[int(choice)] # --------------> sketch_array = self.cat2sketch[cla] sketch = random.choices(sketch_array, k=4) sketch_array = [] for sk in sketch: sk = pickle.load(open( sk, 'rb')) # ------> Uncomment for sketches key = list(sk.keys())[0] sk = convert_to_np_raw(sk[key]) sk = np.stack((sk, sk, sk), axis=0) / 255.0 sketch_array.append(sk) sketch_array = np.stack(sketch_array, axis=0) # ---------------> # Delete useless gt_boxes blobs['gt_boxes'][:, -1] = np.where( blobs['gt_boxes'][:, -1] == choice, 1, 0) # Get query image # print(sketch.shape) # query = self.load_query(choice) # Uncomment for images # print(query.shape) # exit(0) query = sketch_array # Uncomment for sketches else: # query = self.load_query(index, minibatch_db[0]['img_id']) # Comment for sketches # ''' # Uncomment for sketches catgory = self.cat_list[index] # list all the candidate image # all_data = self._query[catgory] # Use image_id to determine the random seed # The list l is candidate sequence, which random by image_id # print(catgory) # exit() id = minibatch_db[0]['img_id'] random.seed(id) # l = list(range(len(all_data))) # random.shuffle(l) cla = self.class2cat[int(catgory)] # print(cla) sketch_array = self.cat2sketch[cla] sketch_data_array = [] random.shuffle(sketch_array) #print(sketch_array) for sketch in sketch_array[0:20]: sketch = pickle.load(open(sketch, 'rb')) key = list(sketch.keys())[0] sketch = convert_to_np_raw(sketch[key]) # intrim_sketch = self.toTensor(sketch) # save_image(intrim_sketch, 'outfile.jpg') sketch = np.stack((sketch, sketch, sketch), axis=0) / 255.0 # print(sketch.shape) # im = Image.fromarray(sketch) # im.save('outfile'+str(sketch_num)+'.jpg') # exit(0) sketch_data_array.append(sketch) query = np.stack(sketch_data_array) # choose the candidate sequence and take out the data information # position=l[self.query_position%len(l)] # data = all_data[position] # ''' data = torch.from_numpy(blobs['data']) # query = torch.from_numpy(query) query = torch.from_numpy(query).contiguous() # Uncomment for sketches # query = torch.from_numpy(query) # Comment for sketches # query = query.permute(0, 3, 1, 2).contiguous().squeeze(0) # Comment for the case of sketches im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < 10 # print(not_keep) # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < torch.FloatTensor([10]) | (gt_boxes[:,3] - gt_boxes[:,1]) < torch.FloatTensor([10]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, query, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) # gt_boxes = torch.FloatTensor([1,1,1,1,1]) gt_boxes = torch.from_numpy(blobs['gt_boxes']) choice = self.cat_list[index] return data, query, im_info, gt_boxes, choice
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) Flag = False if blobs['gt_boxes'].size == 0: gt_boxes = torch.FloatTensor([0, 0, 10, 10, 0]).view(1, -1) Flag = True else: #np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) if blobs['gt_relations'].size == 0: gt_relations = torch.LongTensor([0, 0, 0]).view(1, -1) else: gt_relations = torch.from_numpy(blobs['gt_relations']).long() # append gt_attributes to gt_boxes gt_att_mat = gt_boxes.new(gt_boxes.size(0), 16).zero_() gt_boxes = torch.cat((gt_boxes, gt_att_mat), 1) # append gt_relations to gt_boxes gt_rels_mat = gt_boxes.new(gt_boxes.size(0), gt_boxes.size(0)).zero_() gt_rels_mat[gt_relations[:, 0], gt_relations[:, 1]] = gt_relations[:, 2].float() gt_boxes = torch.cat((gt_boxes, gt_rels_mat), 1) im_info = torch.from_numpy(blobs['im_info']) data_height, data_width = data.size(1), data.size(2) if self.training: # we need to random shuffle the bounding box. ratio = self.ratio_list_batch[index] # np.random.shuffle(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE: need to cope with vanished gt boxes after cropping # get the index range # if the image need to crop, crop to the target size. if self.need_crop[index_ratio] > 0: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height y_s = 0 box_region = max_y - min_y + 1 # if min_y == 0: # y_s = 0 # else: # if (box_region-trim_size) < 0: # y_s_min = max(max_y-trim_size, 0) # y_s_max = min(min_y, data_height-trim_size) # if y_s_min == y_s_max: # y_s = y_s_min # else: # y_s = np.random.choice(range(y_s_min, y_s_max)) # else: # y_s_add = int((box_region-trim_size)/2) # if y_s_add == 0: # y_s = min_y # else: # y_s = np.random.choice(range(min_y, min_y+y_s_add)) # crop the image if trim_size <= 0: pdb.set_trace() data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - y_s gt_boxes[:, 3] = gt_boxes[:, 3] - y_s # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.floor(data_height * ratio)) if trim_size > data_width: trim_size = data_width x_s = 0 # box_region = max_x - min_x + 1 # if min_x == 0: # x_s = 0 # else: # if (box_region-trim_size) < 0: # x_s_min = max(max_x-trim_size, 0) # x_s_max = min(min_x, data_width-trim_size) # if x_s_min == x_s_max: # x_s = x_s_min # else: # x_s = np.random.choice(range(x_s_min, x_s_max)) # else: # x_s_add = int((box_region-trim_size)/2) # if x_s_add == 0: # x_s = min_x # else: # x_s = np.random.choice(range(min_x, min_x+x_s_add)) # crop the image if trim_size <= 0: pdb.set_trace() data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - x_s gt_boxes[:, 2] = gt_boxes[:, 2] - x_s # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() data_height = data[0].size(0) padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() data_width = data[0].size(1) padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] gt_boxes.clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size if gt_boxes.size(0) > self.max_num_box: if not cfg.HAS_RELATIONS: gt_boxes = gt_boxes[:self.max_num_box] else: gt_boxes = gt_boxes[:self.max_num_box, :(self.max_num_box + 21)] # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor( self.max_num_box, self.max_num_box + 21).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] if cfg.HAS_RELATIONS: gt_boxes = gt_boxes[:, torch.cat((torch.arange(0, 21).long(), keep + 21), 0)] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :gt_boxes. size(1)] = gt_boxes[:num_boxes] else: num_boxes = 0 # take the top num_boxes # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) if self.normalize: padding_data = padding_data / 255.0 padding_data = self.normalize(padding_data) return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) num_boxes = gt_boxes.size(0) im_info = im_info.view(3) if self.normalize: data = data / 255.0 data = self.normalize(data) if Flag: num_boxes = 0 else: num_boxes = min(gt_boxes.size(0), self.max_num_box) return data, im_info, gt_boxes, num_boxes
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = self._roidb[index_ratio] blobs = get_minibatch_allinone(minibatch_db) blobs = self._imagePreprocess(blobs) data = torch.from_numpy(blobs['data'].copy()) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(0), data.size(1) if self.training: gt_boxes = torch.from_numpy(blobs['gt_boxes']) gt_grasps = torch.from_numpy(blobs['gt_grasps']) gt_grasp_inds = torch.from_numpy(blobs['gt_grasp_inds']) # shuffle boxes shuffle_inds_b = range(blobs['gt_boxes'].shape[0]) np.random.shuffle(shuffle_inds_b) shuffle_inds_b = torch.LongTensor(shuffle_inds_b) gt_boxes = gt_boxes[shuffle_inds_b] gt_grasp_inds = self._graspIndsPostProcess( gt_grasp_inds, shuffle_inds_b.data.numpy(), blobs['node_inds']) # shuffle grasps shuffle_inds_g = range(blobs['gt_grasps'].shape[0]) np.random.shuffle(shuffle_inds_g) shuffle_inds_g = torch.LongTensor(shuffle_inds_g) gt_grasps = gt_grasps[shuffle_inds_g] gt_grasp_inds = gt_grasp_inds[shuffle_inds_g] # if batch_size > 1, all images need to be processed to have the same size if self.batch_size > 1: ratio = self.ratio_list_batch[index] # if the image need to crop, crop to the target size. coord_s = (0, 0) if self._roidb[index_ratio]['need_crop']: # here image cropping is according to both gt_boxes and gt_grasps data, coord_s = self._cropImage( data, torch.cat((gt_grasps, gt_boxes), dim=-1), ratio) # based on the ratio, padding the image. data, im_info = self._paddingImage(data, im_info, ratio) # crpo bbox according to cropped image gt_boxes = self._cropBox(data, coord_s, gt_boxes) gt_grasps, _, gt_grasp_inds = self._cropGrasp( data, coord_s, gt_grasps, gt_grasp_inds) gt_boxes, keep = self._boxPostProcess(gt_boxes) gt_grasps, num_grasps, gt_grasp_inds = self._graspPostProcess( gt_grasps, gt_grasp_inds) shuffle_inds_b = shuffle_inds_b[keep] rel_mat = self._genRelMat(shuffle_inds_b, blobs['node_inds'], blobs['child_lists'], blobs['parent_lists']) # permute trim_data to adapt to downstream processing data = data.permute(2, 0, 1).contiguous() assert data.size(1) == im_info[0] and data.size(2) == im_info[1] return data, im_info, gt_boxes, gt_grasps, keep.size( 0), num_grasps, rel_mat, gt_grasp_inds else: data = data.permute(2, 0, 1).contiguous() gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) gt_grasps = torch.FloatTensor([1, 1, 1, 1, 1, 1, 1, 1]) gt_grasp_inds = torch.LongTensor([0]) num_boxes = 0 num_grasps = 0 rel_mat = torch.FloatTensor([0]) return data, im_info, gt_boxes, gt_grasps, num_boxes, num_grasps, rel_mat, gt_grasp_inds
def __getitem__(self, index): self.trim_size = min(self.trim_height, self.trim_width) minibatch_db = [self._roidb[index]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) rois = torch.from_numpy(blobs['rois']) image_classes = torch.from_numpy(blobs['image_classes']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: # np.random.shuffle(blobs['gt_boxes']) # np.random.shuffle(blobs['weak_gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) num_boxes = gt_boxes.size(0) wgt_boxes = torch.from_numpy(blobs['weak_gt_boxes']) wnum_boxes = wgt_boxes.size(0) avaiable_boxes = torch.from_numpy( np.vstack((blobs['gt_boxes'], blobs['weak_gt_boxes']))) # avaiable_boxes = torch.from_numpy(blobs['gt_boxes']) if data_height > self.trim_height: # this means that data_width < data_height, we need to crop the # data_height min_y = int(torch.min(avaiable_boxes[:, 1])) max_y = int(torch.max(avaiable_boxes[:, 3])) trim_size = self.trim_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice(range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes if num_boxes > 0: gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) # shift y coordiante of weak gt_boxes if wnum_boxes > 0: wgt_boxes[:, 1] = wgt_boxes[:, 1] - float(y_s) wgt_boxes[:, 3] = wgt_boxes[:, 3] - float(y_s) wgt_boxes[:, 1].clamp_(0, trim_size - 1) wgt_boxes[:, 3].clamp_(0, trim_size - 1) # shift y coordiante of rois rois[:, 2] = rois[:, 2] - float(y_s) rois[:, 4] = rois[:, 4] - float(y_s) rois[:, 2].clamp_(0, trim_size - 1) rois[:, 4].clamp_(0, trim_size - 1) if data_width > self.trim_width: # this means that data_width > data_height, we need to crop the # data_width min_x = int(torch.min(avaiable_boxes[:, 0])) max_x = int(torch.max(avaiable_boxes[:, 2])) trim_size = self.trim_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice(range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes if num_boxes > 0: gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # shift x coordiante of gt_boxes if wnum_boxes > 0: wgt_boxes[:, 0] = wgt_boxes[:, 0] - float(x_s) wgt_boxes[:, 2] = wgt_boxes[:, 2] - float(x_s) wgt_boxes[:, 0].clamp_(0, trim_size - 1) wgt_boxes[:, 2].clamp_(0, trim_size - 1) # shift x coordiante of rois rois[:, 1] = rois[:, 1] - float(x_s) rois[:, 3] = rois[:, 3] - float(x_s) rois[:, 1].clamp_(0, trim_size - 1) rois[:, 3].clamp_(0, trim_size - 1) trim_size = min(self.trim_width, self.trim_height) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() real_height = min(trim_size, data_height) real_width = min(trim_size, data_width) padding_data[:real_height, :real_width, :] = data[0][:real_height, :real_width, :] rois[:, 1:5].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: if num_boxes > 0: gt_boxes[:, :4].clamp_(0, trim_size) not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | ( gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor( self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 else: gt_boxes_padding = torch.FloatTensor( self.max_num_box, 5).zero_() num_boxes = 0 # check the weak bounding box: if wnum_boxes > 0: wgt_boxes[:, :4].clamp_(0, trim_size) wnot_keep = (wgt_boxes[:, 0] == wgt_boxes[:, 2]) | ( wgt_boxes[:, 1] == wgt_boxes[:, 3]) wkeep = torch.nonzero(wnot_keep == 0).view(-1) wgt_boxes_padding = torch.FloatTensor( self.max_num_box, wgt_boxes.size(1)).zero_() if wkeep.numel() != 0: wgt_boxes = wgt_boxes[wkeep] wnum_boxes = min(wgt_boxes.size(0), self.max_num_box) wgt_boxes_padding[:wnum_boxes, :] = wgt_boxes[:wnum_boxes] else: wnum_boxes = 0 else: wgt_boxes_padding = torch.FloatTensor( self.max_num_box, 5).zero_() wnum_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) # padding rois rois_not_keep = (rois[:, 1] == rois[:, 3]) | ( rois[:, 2] == rois[:, 4]) rois_keep = torch.nonzero(rois_not_keep == 0).view(-1) rois = rois[rois_keep] max_num_rois = 2000 num_rois = min(rois.size(0), max_num_rois) rois_padding = torch.FloatTensor( max_num_rois, 5).zero_() rois_padding[:num_rois, :] = rois[:num_rois] return padding_data, im_info, gt_boxes_padding, num_boxes, wgt_boxes_padding, wnum_boxes, rois_padding, image_classes else: data = data.permute(0, 3, 1, 2).contiguous().view( 3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes, rois, image_classes
answer = list(dict()) for model_file in model_file_list: cnn = torch.load(model_file, map_location=device) cnn.eval() # Change model to 'eval' mode . nets.append(cnn) with torch.set_grad_enabled(False): i = 0 tick = time.time() for data, hash in test_generator: combined_classes = torch.zeros(6, device=device) for net in nets: # Here is the trick. The datagen generates batch of 1, but dataloader actually returns data in # batches with vaiable length. So we permutate dims to get a proper tensor outputs = net(data.permute((1,0,2)).to(device)) classes = torch.softmax(outputs, 1).mean(0) combined_classes += classes winner = combined_classes.argmax().item() answer.append({'hash': hash[0], 'class': class_list[winner]}) # print(winner) i += 1 if i % 100 == 0: tock = time.time() time_to_go = (len(test_generator)-i) * len(test_generator) / 100 * (tock - tick) print('Batch {:d} / {:d}, {:.1f} sec, to go: {:.0f}'.format( i, len(test_generator), tock - tick, time_to_go) )
def __getitem__(self, index): here = self._image_set[index] im = imread(here['img_full_path']) if len(im.shape) == 2: im = im[:, :, np.newaxis] im = np.concatenate((im, im, im), axis=2) raw_img = im.copy() # rgb -> bgr im = im[:, :, ::-1] gt_boxes = here['object_set'].copy() # random flip if self.training and np.random.rand() > 0.5: im = im[:, ::-1, :] raw_img = raw_img[:, ::-1, :].copy() flipped_gt_boxes = gt_boxes.copy() flipped_gt_boxes[:, 0] = im.shape[1] - gt_boxes[:, 2] flipped_gt_boxes[:, 2] = im.shape[1] - gt_boxes[:, 0] gt_boxes = flipped_gt_boxes if self.rotation: gt_boxes = to_center_form(gt_boxes) rotated_gt_boxes = gt_boxes.copy() h, w = im.shape[0], im.shape[1] angle = np.random.choice([0, 90, 180, 270]) #im = rotate(im, angle) #raw_img = rotate(raw_img, angle) if angle == 90: im = im.transpose([1, 0, 2])[::-1, :, :].copy() raw_img = raw_img.transpose([1, 0, 2])[::-1, :, :].copy() rotated_gt_boxes[:, 0], rotated_gt_boxes[:, 1] = gt_boxes[:, 1], w - gt_boxes[:, 0] rotated_gt_boxes[:, 2], rotated_gt_boxes[:, 3] = gt_boxes[:, 3], gt_boxes[:, 2] elif angle == 180: im = im[::-1, ::-1, :].copy() raw_img = raw_img[::-1, ::-1, :].copy() rotated_gt_boxes[:, 0], rotated_gt_boxes[:, 1] = w - gt_boxes[:, 0], h - gt_boxes[:, 1] elif angle == 270: im = im.transpose([1, 0, 2])[:, ::-1, :].copy() raw_img = raw_img.transpose([1, 0, 2])[:, ::-1, :].copy() rotated_gt_boxes[:, 0], rotated_gt_boxes[:, 1] = h - gt_boxes[:, 1], gt_boxes[:, 0] rotated_gt_boxes[:, 2], rotated_gt_boxes[:, 3] = gt_boxes[:, 3], gt_boxes[:, 2] gt_boxes = to_point_form(rotated_gt_boxes) im = im.astype(np.float32, copy=False) if self.pd is not None: im = self.pd(im) im -= np.array([[[102.9801, 115.9465, 122.7717]]]) im_shape = im.shape im_size_min = np.min(im_shape[0:2]) if self.multi_scale: im_scale = np.random.choice([416, 500, 600, 720, 864 ]) / float(im_size_min) else: im_scale = 600 / float(im_size_min) im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) data = torch.from_numpy(im) data_height, data_width = data.size(0), data.size(1) data = data.permute(2, 0, 1).contiguous() if self.training: np.random.shuffle(gt_boxes) box_categories = gt_boxes[:, 4].astype(np.long) for i in range(len(box_categories)): box_categories[i] = self._id_to_index[box_categories[i]] gt_boxes = gt_boxes[:, :4] gt_boxes *= im_scale gt_boxes = torch.from_numpy(gt_boxes) box_categories = torch.from_numpy(box_categories) #print(data, gt_boxes, data_height, data_width, im_scale, raw_img) return data, gt_boxes, box_categories, data_height, data_width, im_scale, raw_img, here[ 'id']
def run(modelcheckpoint, normalizeData, simfile): """ """ model = wresnet34x2().cpu() if os.path.isfile(modelcheckpoint): print("=> Loading checkpoint '{}'".format(modelcheckpoint)) checkpoint = torch.load(modelcheckpoint, map_location=lambda storage, loc: storage) best_acc = checkpoint['best_acc'] print("This model had an accuracy of %.2f on the validation set." % (best_acc, )) keys = checkpoint['state_dict'].keys() for old_key in keys: new_key = old_key.replace('module.', '') checkpoint['state_dict'][new_key] = checkpoint['state_dict'].pop( old_key) model.load_state_dict(checkpoint['state_dict']) print("=> Loaded checkpoint '{}' (epoch {})".format( modelcheckpoint, checkpoint['epoch'])) else: print("=> No model checkpoint found. Exiting") return None cudnn.benchmark = False # Load the Normalizer function h = h5py.File(normalizeData, 'r') mean = torch.FloatTensor(h['mean'][:]) mean = mean.permute(2, 0, 1) std_dev = torch.FloatTensor(h['std_dev'][:]) std_dev = std_dev.permute(2, 0, 1) h.close() normalize = transforms.Normalize(mean=mean, std=std_dev) # Load simulation data time_freq_resolution = (384, 512) aca = ibmseti.compamp.SimCompamp(open(simfile, 'rb').read()) complex_data = aca.complex_data() complex_data = complex_data.reshape(time_freq_resolution[0], time_freq_resolution[1]) complex_data = complex_data * np.hanning(complex_data.shape[1]) cpfft = np.fft.fftshift(np.fft.fft(complex_data), 1) spectrogram = np.abs(cpfft) features = np.stack( (np.log(spectrogram**2), np.arctan(cpfft.imag / cpfft.real)), -1) # create FloatTensor, permute to proper dimensional order, and normalize data = torch.FloatTensor(features) data = data.permute(2, 0, 1) data = normalize(data) # The model expects a 4D tensor s = data.size() data = data.contiguous().view(1, s[0], s[1], s[2]) input_var = torch.autograd.Variable(data, volatile=True) model.eval() softmax = torch.nn.Softmax() softmax.zero_grad() output = model(input_var) probs = softmax(output).data.view(7).tolist() return probs
def __getitem__(self, index): im, gt_boxes, gt_categories, proposals, prop_scores, id, loader_index = self.get_raw_data( index) raw_img = im.copy() proposals, prop_scores = self.select_proposals(proposals, prop_scores) if self.warping and np.random.rand() > 0.8: src, dst = make_transform(im, gt_boxes) tform = PiecewiseAffineTransform() tform.estimate(src, dst) im = warp(im, tform, output_shape=(im.shape[0], im.shape[1])) raw_img = im.copy() # rgb -> bgr im = im[:, :, ::-1] # random flip # if self.training and np.random.rand() > 0.5: # im = im[:, ::-1, :] # raw_img = raw_img[:, ::-1, :].copy() # # flipped_gt_boxes = gt_boxes.copy() # flipped_gt_boxes[:, 0] = im.shape[1] - gt_boxes[:, 2] # flipped_gt_boxes[:, 2] = im.shape[1] - gt_boxes[:, 0] # gt_boxes = flipped_gt_boxes # # flipped_xmin = im.shape[1] - proposals[:, 2] # flipped_xmax = im.shape[1] - proposals[:, 0] # proposals[:, 0] = flipped_xmin # proposals[:, 2] = flipped_xmax if self.training and self.rotation: gt_boxes = to_center_form(gt_boxes) rotated_gt_boxes = gt_boxes.copy() h, w = im.shape[0], im.shape[1] angle = np.random.choice([0, 90, 180, 270]) #im = rotate(im, angle) #raw_img = rotate(raw_img, angle) if angle == 90: im = im.transpose([1, 0, 2])[::-1, :, :].copy() raw_img = raw_img.transpose([1, 0, 2])[::-1, :, :].copy() rotated_gt_boxes[:, 0], rotated_gt_boxes[:, 1] = gt_boxes[:, 1], w - gt_boxes[:, 0] rotated_gt_boxes[:, 2], rotated_gt_boxes[:, 3] = gt_boxes[:, 3], gt_boxes[:, 2] elif angle == 180: im = im[::-1, ::-1, :].copy() raw_img = raw_img[::-1, ::-1, :].copy() rotated_gt_boxes[:, 0], rotated_gt_boxes[:, 1] = w - gt_boxes[:, 0], h - gt_boxes[:, 1] elif angle == 270: im = im.transpose([1, 0, 2])[:, ::-1, :].copy() raw_img = raw_img.transpose([1, 0, 2])[:, ::-1, :].copy() rotated_gt_boxes[:, 0], rotated_gt_boxes[:, 1] = h - gt_boxes[:, 1], gt_boxes[:, 0] rotated_gt_boxes[:, 2], rotated_gt_boxes[:, 3] = gt_boxes[:, 3], gt_boxes[:, 2] gt_boxes = to_point_form(rotated_gt_boxes) # cast to float type and mean subtraction im = im.astype(np.float32, copy=False) if self.pd is not None: im = self.pd(im) raw_img = self.pd(raw_img.astype(np.float32, copy=False)).astype(np.uint8) im -= np.array([[[102.9801, 115.9465, 122.7717]]]) # image rescale im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if self.multi_scale: im_scale = np.random.choice([416, 500, 600, 720, 864 ]) / float(im_size_min) if im_size_max * im_scale > 1200: im_scale = 1200 / im_size_max else: im_scale = 600 / float(im_size_min) im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) gt_boxes = gt_boxes * im_scale proposals = proposals * im_scale # to tensor data = torch.from_numpy(im) data = data.permute(2, 0, 1).contiguous() gt_boxes = torch.from_numpy(gt_boxes) proposals = torch.from_numpy(proposals) prop_scores = torch.from_numpy(prop_scores) gt_categories = torch.from_numpy(gt_categories) image_level_label = torch.zeros(80) for label in gt_categories: image_level_label[label] = 1.0 return data, gt_boxes, gt_categories, proposals, prop_scores, image_level_label, im_scale, raw_img, id, loader_index
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index minibatch_db = [self._roidb[index_ratio]] blobs = self.get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height boxes_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (boxes_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((boxes_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) data = data[:, y_s:(y_s + trim_size), :, :] gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) data = data[:, :, x_s:(x_s + trim_size), :] gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) if ratio < 1: trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor( int(np.ceil(data_width / ratio)), data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] im_info[0, 0] = padding_data.size(0) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes, :] else: num_boxes = 0 padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def train_ccblock(model_options): # load datasets train_file_paths = ["/hhd12306-2/langruimin/ActivityNet1.3/resnet50_V2/conv5/video_{}.npy".format(i) for i in range(9654)] videoset = VideoDataset(train_file_paths) print(len(videoset)) # create model model = RCCAModule_3d(2048,2) model_quan = Quantization(16, model_options.subCenters, 2048) params_path = os.path.join(model_options.model_save_path, model_options.params_filename) params_path_Q = os.path.join(model_options.model_save_path, model_options.Qparams_filename) if model_options.reload_params: print('Loading model params...') model.load_state_dict(torch.load(params_path)) print('Done.') model = model.cuda() model_quan = model_quan.cuda() # optimizer optimizer = RAdam( model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4 ) optimizer2 = RAdam( model_quan.parameters(), lr=1e-2, betas=(0.9, 0.999), weight_decay=1e-4 ) lr_C= '' lr_Q= '' # load the similarity matrix print("+++++++++loading similarity+++++++++") f = open("/home/langruimin/BLSTM_pytorch/data/activitynet-v1.3/Sim_K1_10_K2_5activitynet_V2.pkl", "rb") similarity = pkl.load(f) similarity = torch.ByteTensor(similarity.astype(np.uint8)) f.close() print("++++++++++similarity loaded+++++++") # ''' batch_idx = 1 train_loss_rec = open(os.path.join(model_options.records_save_path, model_options.train_loss_filename), 'w') error_ = 0. loss_ = 0. num = 0 neighbor = True neighbor_freq = 2 print("##########start train############") trainloader = torch.utils.data.DataLoader(videoset, batch_size=12, shuffle=True,num_workers=4, pin_memory=True) model.train() model_quan.train() neighbor_loss = 0.0 for l in range(60): if neighbor == True: # training for i, (data, index) in enumerate(trainloader): data = data.to(model_options.default_dtype) # data = data.unsqueeze(1) data = data.cuda() data = data.permute(0,2,1,3,4) output_ccblock_mean = torch.tanh(model(data)) # quantization block Qhard, Qsoft, SoftDistortion, HardDistortion, JointCenter, error,_ = model_quan(output_ccblock_mean) Q_loss = 0.1 * SoftDistortion + HardDistortion + 0.1 * JointCenter optimizer2.zero_grad() Q_loss.backward(retain_graph=True) optimizer2.step() if l % neighbor_freq == 0: # neighbor loss similarity_select = torch.index_select(similarity, 0, index) similarity_select = torch.index_select(similarity_select, 1, index).float().cuda() neighbor_loss = torch.sum((torch.mm(output_ccblock_mean, output_ccblock_mean.transpose(0,1)) / output_ccblock_mean.shape[-1] - similarity_select).pow(2)) optimizer.zero_grad() neighbor_loss.backward() optimizer.step() error_ += error.item() loss_ += neighbor_loss.item() num += 1 if batch_idx % model_options.disp_freq == 0: info = "epoch{0} Batch {1} loss:{2:.3f} distortion:{3:.3f} " \ .format(l, batch_idx, loss_/ num, error_ / num) print(info) train_loss_rec.write(info + '\n') batch_idx += 1 batch_idx = 0 error_ = 0. loss_ = 0. num = 0 if (l+1) % model_options.save_freq == 0: print('epoch: ', l ,'New best model. Saving model ...') torch.save(model.state_dict(), params_path) torch.save(model_quan.state_dict(), params_path_Q) for param_group in optimizer.param_groups: lr_C = param_group['lr'] for param_group in optimizer2.param_groups: lr_Q = param_group['lr'] record_inf ="saved model at epoch {0} lr_C:{1} lr_Q:{2}".format(l, lr_C, lr_Q) train_loss_rec.write(record_inf + '\n') print("##########epoch done##########") print('train done. Saving model ...') torch.save(model.state_dict(), params_path) torch.save(model_quan.state_dict(), params_path_Q) print("##########train done##########")
def __getitem__(self, index): index_ratio = int(self.ratio_index[index]) # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) # rajath blobs['gt_boxes'] = [ x for x in blobs['gt_boxes'] if x[-1] in self.list_ind ] # blobs['gt_boxes'] = [x for x in blobs['gt_boxes'] if int(x[-1]) in self.sketchy_classes] blobs['gt_boxes'] = np.array(blobs['gt_boxes']) if self.training: # Random choice query catgory catgory = blobs['gt_boxes'][:, -1] cand = np.unique(catgory).astype(np.uint8) # cand = np.intersect1d(cand, self.sketchy_classes) # print ("index:", index, "\nindex_ratio:", index_ratio, "\ncatgory:", catgory, "\ncand:", cand, "\nsketchy_classes:", self.sketchy_classes) if len(cand) == 1: choice = cand[0] else: p = [] for i in cand: p.append(self.show_time[i]) p = np.array(p) p /= p.sum() choice = np.random.choice(cand, 1, p=p)[0] # Delete useless gt_boxes blobs['gt_boxes'][:, -1] = np.where( blobs['gt_boxes'][:, -1] == choice, 1, 0) # Get query image query = self.load_query(choice) else: query = self.load_query(index, minibatch_db[0]['img_id']) data = torch.from_numpy(blobs['data']) query = torch.from_numpy(query) query = query.permute(0, 3, 1, 2).contiguous().squeeze(0) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < 10 # print(not_keep) # not_keep = (gt_boxes[:,2] - gt_boxes[:,0]) < torch.FloatTensor([10]) | (gt_boxes[:,3] - gt_boxes[:,1]) < torch.FloatTensor([10]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, query, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) # gt_boxes = torch.FloatTensor([1,1,1,1,1]) gt_boxes = torch.from_numpy(blobs['gt_boxes']) choice = self.cat_list[index] return data, query, im_info, gt_boxes, choice
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) #照这样看来,这里并不直接选择索引index的图片,而是选择按长宽比排序的第index个图片(ratio_index里面保存的是按长宽比排序的图片的索引) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] ''' minibatch_db=[{}] 里面就一个字典 box.shape=(n,4) n代表这张图片上有几个目标 gt_classes.shape=(n) 例如:array([12, 15]) 代表第一个目标是第12类(猫),第二个目标是第15类(人) gt_overlaps. overlaps.shape=(n,21) 值为0或1 默认0, 如果[2,20]=1代表该张图片上第2个目标是第20类的分割 flipped= False(前5011个为false ,后5011个为True) seg_areas.shape=(n) 例如:array([ 19536., 168015.] 代表第一个目标box的面积是19536, 第二个目标box的面积是168015 max_classes=[12,15] 表示第一个目标是第12类,第二个目标是滴15类 max_overlaps=[1,1,1,..] 如果这张图片有n个obj ,那么就有n个1 need_crop = 0或1 1:代表图片长宽比太大或者台太小,需要裁剪 0:代表不需要 ''' blobs = get_minibatch(minibatch_db, self._num_classes) ''' blobs:{} data : im_blob.shape=(1,W,H,3)是经过尺寸调整的图片 gt_boxes : shape=(n, 5) 5:[x1,y1,x2,y2,kind] im_info : shape=(1,3) 3:w,h,scale img_id : 00026 ''' data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes # '''train # im_data.shape=(b,3,512,512) # im_info.sahpe=(b,3) # gt_boxes.sahpe=(b,20,5)一应该是这张图上有20个gt,5分别为4个坐标加一个类别 前n个是真正的gt,后面20-n都是0 # num_boxes = (n) # ''' else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def __getitem__(self, index): ' Get sample' # Load image id = self.ids[index] if self.coco: image = self.coco.loadImgs(id)[0]['file_name'] im = Image.open('{}/{}'.format(self.path, image)).convert("RGB") # Randomly sample scale for resize during training resize = self.resize if isinstance(resize, list): resize = random.randint(self.resize[0], self.resize[-1]) ratio = resize / min(im.size) if ratio * max(im.size) > self.max_size: ratio = self.max_size / max(im.size) im = im.resize((int(ratio * d) for d in im.size), Image.BILINEAR) if self.training: # Get annotations boxes, categories = self._get_target(id) boxes *= ratio # Random rotation, if self.rotate_augment random_angle = random.randint(0, 3) * 90 if self.rotate_augment and random_angle != 0: # rotate by random_angle degrees. im = im.rotate(random_angle) x, y, w, h = boxes[:, 0].clone(), boxes[:, 1].clone( ), boxes[:, 2].clone(), boxes[:, 3].clone() if random_angle == 90: boxes[:, 0] = y boxes[:, 1] = im.size[1] - x - w boxes[:, 2] = h boxes[:, 3] = w elif random_angle == 180: boxes[:, 0] = im.size[0] - x - w boxes[:, 1] = im.size[1] - y - h elif random_angle == 270: boxes[:, 0] = im.size[0] - y - h boxes[:, 1] = x boxes[:, 2] = h boxes[:, 3] = w # Random horizontal flip if random.randint(0, 1): im = im.transpose(Image.FLIP_LEFT_RIGHT) boxes[:, 0] = im.size[0] - boxes[:, 0] - boxes[:, 2] # Apply image brightness, contrast etc augmentation if self.augment_brightness: brightness_factor = random.normalvariate( 1, self.augment_brightness) brightness_factor = max(0, brightness_factor) im = adjust_brightness(im, brightness_factor) if self.augment_contrast: contrast_factor = random.normalvariate(1, self.augment_contrast) contrast_factor = max(0, contrast_factor) im = adjust_contrast(im, contrast_factor) if self.augment_hue: hue_factor = random.normalvariate(0, self.augment_hue) hue_factor = max(-0.5, hue_factor) hue_factor = min(0.5, hue_factor) im = adjust_hue(im, hue_factor) if self.augment_saturation: saturation_factor = random.normalvariate( 1, self.augment_saturation) saturation_factor = max(0, saturation_factor) im = adjust_saturation(im, saturation_factor) target = torch.cat([boxes, categories], dim=1) # Convert to tensor and normalize data = torch.ByteTensor(torch.ByteStorage.from_buffer(im.tobytes())) data = data.float().div(255).view(*im.size[::-1], len(im.mode)) data = data.permute(2, 0, 1) for t, mean, std in zip(data, self.mean, self.std): t.sub_(mean).div_(std) # Apply padding pw, ph = ((self.stride - d % self.stride) % self.stride for d in im.size) data = F.pad(data, (0, pw, 0, ph)) if self.training: return data, target return data, id, ratio
def get_data(self, index, h_flip=False, target_im_size=688, square_img=False): im, gt_boxes, gt_categories, proposals, prop_scores, id, loader_index = self.get_raw_data( index) raw_img = im.copy() proposals, prop_scores = self.select_proposals(proposals, prop_scores) # rgb -> bgr im = im[:, :, ::-1] # horizontal flip if h_flip: im = im[:, ::-1, :] raw_img = raw_img[:, ::-1, :].copy() flipped_xmin = im.shape[1] - gt_boxes[:, 2] flipped_xmax = im.shape[1] - gt_boxes[:, 0] gt_boxes[:, 0] = flipped_xmin gt_boxes[:, 2] = flipped_xmax flipped_xmin = im.shape[1] - proposals[:, 2] flipped_xmax = im.shape[1] - proposals[:, 0] proposals[:, 0] = flipped_xmin proposals[:, 2] = flipped_xmax # cast to float type and mean subtraction im = im.astype(np.float32, copy=False) im -= np.array([[[102.9801, 115.9465, 122.7717]]]) # image rescale im_shape = im.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if square_img: x_scale = target_im_size / im_shape[1] y_scale = target_im_size / im_shape[0] im = cv2.resize(im, None, None, fx=x_scale, fy=y_scale, interpolation=cv2.INTER_LINEAR) gt_boxes = gt_boxes * np.array( [x_scale, y_scale, x_scale, y_scale]) proposals = proposals * np.array( [x_scale, y_scale, x_scale, y_scale]) im_scale = [x_scale, y_scale] else: im_scale = target_im_size / float(im_size_max) if im_size_max * im_scale > 2000: im_scale = 2000 / im_size_max im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) gt_boxes = gt_boxes * im_scale proposals = proposals * im_scale # to tensor data = torch.tensor(im, dtype=torch.float32) data = data.permute(2, 0, 1).contiguous() gt_boxes = torch.tensor(gt_boxes, dtype=torch.float32) proposals = torch.tensor(proposals, dtype=torch.float32) prop_scores = torch.tensor(prop_scores, dtype=torch.float32) gt_categories = torch.tensor(gt_categories, dtype=torch.long) image_level_label = torch.zeros(self.num_classes, dtype=torch.uint8) for label in gt_categories: image_level_label[label] = 1 return { 'im_data': data, 'gt_boxes': gt_boxes, 'gt_labels': gt_categories, 'proposals': proposals, 'prop_scores': prop_scores, 'image_level_label': image_level_label, 'im_scale': im_scale, 'raw_img': raw_img, 'id': id }
# initialize Generator & Discriminator netG = Generator().to(device) weights_init(netG) print(netG) netD = Discriminator().to(device) weights_init(netD) print(netD) # load ".off" files volumes = d.getAll(obj=obj, train=True, is_local=is_local, obj_ratio=obj_ratio) print('Using ' + obj + ' Data') volumes = volumes[..., np.newaxis].astype(np.float) data = torch.from_numpy(volumes) data = data.permute(0, 4, 1, 2, 3) data = data.type(torch.FloatTensor) # choose loss function criterion = nn.BCELoss() criterion2 = nn.MSELoss() # fake/real labels real_label = 1 fake_label = 0 # setup optimizers optG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2)) optD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2))
def __getitem__(self, index): index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # (H, W, scale) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) # gt_boxes = torch.FloatTensor([1,1,1,1,1]) gt_boxes = torch.from_numpy(blobs['gt_boxes']) all_cls_gt_boxes = gt_boxes.clone() cur_cls_id_list = [] for i in range(gt_boxes.size(0)): if gt_boxes[i, 4] not in cur_cls_id_list: cur_cls_id_list.append(gt_boxes[i, 4]) random.seed(0) chosen_cls = random.sample(cur_cls_id_list, k=1)[0] new_gt_boxes = [] for i in range(gt_boxes.size(0)): if gt_boxes[i, 4] == chosen_cls: new_gt_boxes.append([ gt_boxes[i, 0], gt_boxes[i, 1], gt_boxes[i, 2], gt_boxes[i, 3], chosen_cls ]) gt_boxes = torch.from_numpy(np.asarray(new_gt_boxes)) num_boxes = 0 # get supports support_data_all = np.zeros( (self.testing_shot, 3, self.support_im_size, self.support_im_size), dtype=np.float32) current_gt_class_id = int(gt_boxes[0][4]) pool = self.support_pool[current_gt_class_id] random.seed(index) selected_supports = random.sample(pool, k=self.testing_shot) for i, _path in enumerate(selected_supports): support_im = imread(_path)[:, :, ::-1] # rgb -> bgr target_size = np.min( support_im.shape[0:2]) # don't change the size support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) _h, _w = support_im.shape[0], support_im.shape[1] if _h > _w: resize_scale = float(self.support_im_size) / float(_h) unfit_size = int(_w * resize_scale) support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR) else: resize_scale = float(self.support_im_size) / float(_w) unfit_size = int(_h * resize_scale) support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR) h, w = support_im.shape[0], support_im.shape[1] support_data_all[i, :, :h, :w] = np.transpose( support_im, (2, 0, 1)) supports = torch.from_numpy(support_data_all) return data, im_info, gt_boxes, num_boxes, supports, all_cls_gt_boxes
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_action_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: # blobs['gt_boxes'], blobs['key_points'] = self.unison_shuffle(blobs['gt_boxes'], blobs['key_points']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) sec_roi_boxes = torch.from_numpy(blobs['sec_roi_boxes']) key_points = torch.from_numpy(blobs['key_points']) img_id = blobs['img_id'] img_name = blobs['img_name'] ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:,1])) max_y = int(torch.max(gt_boxes[:,3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region-trim_size) < 0: y_s_min = max(max_y-trim_size, 0) y_s_max = min(min_y, data_height-trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region-trim_size)/2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice(range(min_y, min_y+y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) sec_roi_boxes[:,1] = sec_roi_boxes[:,1] - float(y_s) sec_roi_boxes[:,3] = sec_roi_boxes[:,3] - float(y_s) key_points[:,1,:] = key_points[:,1,:] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) sec_roi_boxes[:,1].clamp_(0, trim_size-1) sec_roi_boxes[:,3].clamp_(0, trim_size-1) key_points[:,1,:].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:,0])) max_x = int(torch.max(gt_boxes[:,2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region-trim_size) < 0: x_s_min = max(max_x-trim_size, 0) x_s_max = min(min_x, data_width-trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region-trim_size)/2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice(range(min_x, min_x+x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) sec_roi_boxes[:,0] = sec_roi_boxes[:,0] - float(x_s) sec_roi_boxes[:,2] = sec_roi_boxes[:,2] - float(x_s) key_points[:,0,:] = key_points[:,0,:] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) sec_roi_boxes[:,0].clamp_(0, trim_size-1) sec_roi_boxes[:,2].clamp_(0, trim_size-1) key_points[:,0,:].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) sec_roi_boxes[:, :4].clamp_(0, trim_size) key_points.clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # # check the bounding box: # not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3]) # keep = torch.nonzero(not_keep == 0).view(-1) # assert gt_boxes [action_clses,5], key_points [NUM_GT_BOX,2], SEC_BOX_ROI [CONTEXT_NUM_ROIS,5] padding_gt_boxes = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() padding_kp = torch.FloatTensor(self.max_num_box, key_points.size(1), 17).zero_() padding_sec_roi_boxes = torch.FloatTensor(self.max_num_sec_box, sec_roi_boxes.size(1)).zero_() num_sec_boxes = min(sec_roi_boxes.size(0), self.max_num_sec_box) num_kp = min(key_points.size(0), self.max_num_box) # random sampling or padding the sec_roi_boxes if sec_roi_boxes.size(0)> self.max_num_sec_box: cinds = npr.choice(np.arange(sec_roi_boxes.size(0)), size=self.max_num_sec_box, replace=False) elif sec_roi_boxes.size(0) > 0: cinds = npr.choice(np.arange(sec_roi_boxes.size(0)), size=self.max_num_sec_box, replace=True) assert(cinds.size == self.max_num_sec_box),"Secondary RoIs are not of correct size" # random sampling or padding the key_points if key_points.size(0)> self.max_num_box: kinds = npr.choice(np.arange(key_points.size(0)), size=self.max_num_box, replace=False) elif key_points.size(0) > 0: kinds = npr.choice(np.arange(key_points.size(0)), size=self.max_num_box, replace=True) assert(kinds.size == self.max_num_box),"Key_points are not of correct size" if gt_boxes.size(0)> self.max_num_box: ginds = npr.choice(np.arange(gt_boxes.size(0)), size=self.max_num_box, replace=False) elif gt_boxes.size(0) > 0: ginds = npr.choice(np.arange(gt_boxes.size(0)), size=self.max_num_box, replace=True) assert(ginds.size == self.max_num_box),"Gt_boxes are not of correct size" # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) padding_sec_roi_boxes = sec_roi_boxes[cinds] padding_kp = key_points[kinds] padding_gt_boxes = gt_boxes[ginds] return padding_data, im_info, padding_gt_boxes, padding_sec_roi_boxes, padding_kp, num_sec_boxes, num_kp, img_name else: gt_boxes = torch.from_numpy(blobs['gt_boxes']) sec_roi_boxes = torch.from_numpy(blobs['sec_roi_boxes']) key_points = torch.from_numpy(blobs['key_points']) img_id = blobs['img_id'] img_name = blobs['img_name'] data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) num_sec_boxes = sec_roi_boxes.size(1) num_kp = 0 return data, im_info, gt_boxes, sec_roi_boxes, key_points, num_sec_boxes, num_kp, img_name
def __getitem__(self, index): """ Given an index of one image, take out corresponding dataset & labels subtract mean, rescale, crop, padding the image :param index: a number (23321 / 2134 / 455 / 1...) :return data: image pixels, 4D tensor (1, 3, h, w) im_info: 2D tensor [[h, w, scale_factor]] gt_boxes: 2D tensor [[x1, y1, x2, y2, cls], [], ...] num_boxes: box_info: link gt label, 2D tensor [[contactstate, handside, magnitude, unitdx, unitdy], [], ...]] """ def unison_shuffled_copies(a, b): assert len(a) == len(b) p = np.random.permutation(len(a)) return a[p], b[p] if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get one roidb, e.g. [{}] minibatch_db = [self._roidb[index_ratio]] # blobs: a dict contains infos of an image (already subtracted pixel mean and resized to 600) # {'data': 4D array (1, 3, h, w), # 'gt_boxes': 2D array [[x1, y1, x2, y2, cls], [], ...], # 'im_info':2D array [[h, w, scale_factor]], # 'img_id':xx, # 'box_info': 2D array [[contactstate, handside, magnitude, unitdx, unitdy], [], ...]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) # 4D array (1, 3, h, w) im_info = torch.from_numpy( blobs['im_info']) # 2D array [[h, w, scale_factor]] data_height, data_width = data.size(1), data.size(2) if self.training: # shuffle the bounding box. blobs['gt_boxes'], blobs['box_info'] = unison_shuffled_copies( blobs['gt_boxes'], blobs['box_info']) # np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) box_info = torch.from_numpy(blobs['box_info']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # if width < height, we need to crop the height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # if width > data_height, we need to crop the width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the width/height ratio, padding the image. # if width < height if ratio < 1: trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor( int(np.ceil(data_width / ratio)), data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] im_info[0, 0] = padding_data.size(0) # update im_info # if width > height elif ratio > 1: padding_data = torch.FloatTensor( data_height, int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = ((gt_boxes[:, 2] - gt_boxes[:, 0]) < 10) * ( (gt_boxes[:, 3] - gt_boxes[:, 1]) < 10) # not_keep = ((gt_boxes[:,2] - gt_boxes[:,0]) < 10) and ((gt_boxes[:,3] - gt_boxes[:,1]) < 10) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() box_info_padding = torch.FloatTensor(self.max_num_box, box_info.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] box_info = box_info[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] box_info_padding[:num_boxes, :] = box_info[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes, box_info_padding else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) box_info = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes, box_info
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] # blobs: {'data': 图片矩阵, 'gt_boxes': [:, :5]=x1, y1, x2, y2, cls, 'im_info':[[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], img_id} blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) # 为 train 阶段准备标签 if self.training: # 重排 gt_boxes # state = np.random.get_state() # np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) # # crowdsourced_classes 要和 gt_boxes 对上顺序 if cfg.LABEL_SOURCE == 2: # # 保证gt_boxes 和 crowdsourced_classes 同序打乱 # np.random.set_state(state) # np.random.shuffle(blobs['crowdsourced_classes']) crowdsourced_classes = torch.from_numpy( blobs['crowdsourced_classes']) else: crowdsourced_classes = None # print('cc: ', crowdsourced_classes) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] # crop # 宽高比不在0.5-2范围内 需要crop if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.LongTensor(self.max_num_box, gt_boxes.size(1)).zero_() # 填充 crowdsourced_classes if cfg.LABEL_SOURCE == 2: padding_crowdsourced_classes = torch.FloatTensor( self.max_num_box, cfg.NUM_ANNOTATOR).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] if cfg.LABEL_SOURCE == 2: padding_crowdsourced_classes[: num_boxes] = crowdsourced_classes[: num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) # data # torch.Size([3, 600, 800]) # info # tensor([600.0000, 800.0000, 1.6000]) # boxes # tensor([[108.8000, 33.6000, 566.4000, 286.4000, 8.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000], # [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]) # num_boxes # 1 if cfg.LABEL_SOURCE == 2: return padding_data, im_info, gt_boxes_padding, num_boxes, padding_crowdsourced_classes else: return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) #如果在训练中就按照长宽比的顺序依次取 else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] # 这里其实是只含有一个roidb字典的列表 blobs = get_minibatch(minibatch_db, self._num_classes) #因为实际上输入的只是一个随机序号,所以这个minibatch的bs只是1 #将roi转化为大小为1的batch数据 # bolb是一个字典,依次为 图片数据、gt_boxes、图片信息(长宽和缩放scale尺寸-->使得最短边为600)、image——id data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) # 将boxes打乱 gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] #注意取这个ratio能够保证最终的ratio一致 #当图片本身的ratio超过了最大值和最小值时,需要先做一下ratio if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:,1])) max_y = int(torch.max(gt_boxes[:,3])) trim_size = int(np.floor(data_width / ratio)) #这个ratio是batch中公用ed if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region-trim_size) < 0: y_s_min = max(max_y-trim_size, 0) y_s_max = min(min_y, data_height-trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region-trim_size)/2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice(range(min_y, min_y+y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:,0])) max_x = int(torch.max(gt_boxes[:,2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region-trim_size) < 0: x_s_min = max(max_x-trim_size, 0) x_s_max = min(min_x, data_width-trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region-trim_size)/2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice(range(min_x, min_x+x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) #刚刚只是解决了需要crop的图片,图片还不一定满足ratio的要求 # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) # 如果当前图像的width/height<1 则它的目标ratio会更小,说明要对高度进行padding padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() # 对高度进行padding,将输入图像放在上面,不用对gt_boxes坐标变换,也就是补0是在图片最下面 padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. # 目标宽高比width/heigth>1 说明原始的输入图像是矮矮胖胖的,则它的目标ratio会更大 # 为了让它变得更加矮矮胖胖,就填充宽度,将原始图像paste到左边,补0补在最右边 padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size #这里需要注意的是无论是crop还是将ratio修改为batch中一致的ratio,这里都没有采用resize的方式 #crop是直接从图中抠 #修改成batch_ratio是直接添加0 #所以图片的缩放因子这里都不变,所以整个过程只做了一次resize操作 #保证同一个batch_size中的输入图像宽高比相同(batch——ratio),同时最短边等于600 #这样就保证了同一个batch的输入图像的分辨率是完全相同的 #就不用再重新书写collate_fn函数组件一个batch # check the bounding box: not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() #[50, 5]用来存放标记 if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing [H, w, 3]->[3, H, W] padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) # 训练数据的返回 im_info含有最终经过crop和padding之后的长宽以及第一次resize的scale因子 # (之后并没有缩放,只是采用的补0或者crop的方式) # gt_boxes_padding [50, 5] num_boxes 就是一个int return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1,1,1,1,1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def train_epoch(self, epoch): self.model.train() if not os.path.exists(self.csv_path): os.mkdir(self.csv_path) train_csv = os.path.join(self.csv_path, 'train.csv') pred_list, target_list, loss_list, pos_list = [],[],[],[] print ('epoch: ', epoch) for batch_idx, item in enumerate(self.train_loader): if self.cfig['model_name'] in ['disrnn']: data, target, dist = item data, target, dist = data.to(self.device), target.to(self.device), dist.to(self.device) else: data, target, ID = item data, target = data.to(self.device), target.to(self.device) if self.cfig['model_name'][-3:] == 'rnn': data = data.permute([1,0,2,3,4]) self.optim.zero_grad() #print ('=================',data.shape) if self.cfig['model_name'] in ['disrnn']: pred = self.model(data, dist) else: pred = self.model(data) # here should be careful pred_prob = F.softmax(pred) if batch_idx == 0: print ('data.shape',data.shape) print ('pred.shape', pred.shape) print('Epoch: ', epoch) loss = nn.CrossEntropyLoss()(pred, target) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 4) self.optim.step() print_str = 'train epoch=%d, batch_idx=%d/%d, loss=%.4f\n' % ( epoch, batch_idx, len(self.train_loader), loss.data[0]) #print(print_str) pred_cls = pred.data.max(1)[1] pos_list += pred_prob[:, 1].data.cpu().numpy().tolist() pred_list += pred_cls.data.cpu().numpy().tolist() target_list += target.data.cpu().numpy().tolist() loss_list.append(loss.data.cpu().numpy().tolist()) try: print (1000 * self.model.dislstmcell.a.grad, ' a grad') print (self.model.dislstmcell.a.data, self.model.dislstmcell.c.data) print (1000 * self.model.dislstmcell.c.grad, 'c grad') except: print ('a.grad none') print (confusion_matrix(target_list, pred_list)) accuracy=accuracy_score(target_list,pred_list) fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list) roc_auc = metrics.auc(fpr, tpr) #-------------------------save to csv -----------------------# if not os.path.exists(train_csv): csv_info = ['epoch', 'loss', 'auc', 'accuracy'] init_csv = pd.DataFrame() for key in csv_info: init_csv[key] = [] init_csv.to_csv(train_csv) df = pd.read_csv(train_csv) data = pd.DataFrame() tmp_epoch = df['epoch'].tolist() tmp_epoch.append(epoch) tmp_auc = df['auc'].tolist() tmp_auc.append(roc_auc) #print('------------------', tmp_epoch) tmp_loss = df['loss'].tolist() tmp_loss.append(np.mean(loss_list)) tmp_acc = df['accuracy'].tolist() tmp_acc.append(accuracy) data['epoch'], data['loss'],data['auc'], data['accuracy'] =tmp_epoch, tmp_loss,tmp_auc, tmp_acc print ('train accuracy: ', accuracy, 'train auc: ', roc_auc) data.to_csv(train_csv)
X=torch.zeros((1,)).cpu(), Y=torch.ones((1, 2)).cpu(), opts=dict( xlabel='Epoch', ylabel='Gradient Norm', title='Gradien Norm - Step', legend=['D GN', 'G GN'] ) ) for epoch in range(args.niter): for i, (data, target) in enumerate(dataloader, 0): # permute B X D X C x H x W ==> B X C X D x H x W data = data.permute(0,2,1,3,4) ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### # train with real netD.zero_grad() real_cpu = data.to(device) batch_size = real_cpu.size(0) label = torch.full((batch_size,), real_label, device=device) # print('input ', real_cpu.shape) # print('label', label.shape) output = netD(real_cpu) # print('pred', output.shape) errD_real = criterion(output, label) errD_real.backward()
def eval_epoch(self, epoch): self.model.eval() if not os.path.exists(self.csv_path): os.mkdir(self.csv_path) eval_csv = os.path.join(self.csv_path, 'eval.csv') pred_list, target_list, loss_list, pos_list = [],[],[],[] for batch_idx, item in enumerate(self.val_loader): if self.cfig['model_name'] in ['disrnn']: data, target, dist = item data, target, dist = data.to(self.device), target.to(self.device), dist.to(self.device) if batch_idx == 0: print (dist.shape) else: data, target, ID = item data, target = data.to(self.device), target.to(self.device) if self.cfig['model_name'][-3:] == 'rnn': data = data.permute([1,0,2,3,4]) self.optim.zero_grad() if self.cfig['model_name'] in ['disrnn']: pred = self.model(data, dist) else: pred = self.model(data) pred_prob = F.softmax(pred) #loss = self.criterion(pred, target) loss = nn.CrossEntropyLoss()(pred, target) pred_cls = pred.data.max(1)[1] pos_list += pred_prob[:, 1].data.cpu().numpy().tolist() pred_list += pred_cls.data.cpu().numpy().tolist() target_list += target.data.cpu().numpy().tolist() loss_list.append(loss.data.cpu().numpy().tolist()) accuracy=accuracy_score(target_list,pred_list) print (confusion_matrix(target_list, pred_list)) fpr, tpr, threshold = metrics.roc_curve(target_list, pos_list) roc_auc = metrics.auc(fpr, tpr) #-------------------------save to csv -----------------------# if not os.path.exists(eval_csv): csv_info = ['epoch', 'loss', 'auc', 'accuracy'] init_csv = pd.DataFrame() for key in csv_info: init_csv[key] = [] init_csv.to_csv(eval_csv) df = pd.read_csv(eval_csv) data = pd.DataFrame() tmp_epoch = df['epoch'].tolist() tmp_epoch.append(epoch) #print ('------------------', tmp_epoch) tmp_loss = df['loss'].tolist() tmp_loss.append(np.mean(loss_list)) tmp_auc = df['auc'].tolist() tmp_auc.append(roc_auc) tmp_acc = df['accuracy'].tolist() tmp_acc.append(accuracy) data['epoch'], data['loss'],data['auc'], data['accuracy'] =tmp_epoch, tmp_loss,tmp_auc, tmp_acc data.to_csv(eval_csv) print ('val accuracy: ', accuracy , 'val auc: ', roc_auc) print ('max val auc at: ', max(tmp_auc), tmp_auc.index(max(tmp_auc)))
def format(self, rgb_array): data = torch.from_numpy(rgb_array).float().to(device='cuda') data /= 255 data = data.permute([2, 0, 1]) data = data.reshape([-1, 3, img_size, img_size]) return data.reshape(-1, img_size * img_size * 3)
def _getitem_unfixed_size(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes, self.training) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: gt_grasps = None if 'gt_grasps' in blobs: shuffle_inds_gr = range(blobs['gt_grasps'].shape[0]) np.random.shuffle(shuffle_inds_gr) shuffle_inds_gr = torch.LongTensor(shuffle_inds_gr) gt_grasps = torch.from_numpy(blobs['gt_grasps']) gt_grasps = gt_grasps[shuffle_inds_gr] if 'gt_grasp_inds' in blobs: gt_grasps_inds = torch.from_numpy(blobs['gt_grasp_inds']) gt_grasps_inds = gt_grasps_inds[shuffle_inds_gr] gt_boxes = None if 'gt_boxes' in blobs: shuffle_inds_bb = range(blobs['gt_boxes'].shape[0]) np.random.shuffle(shuffle_inds_bb) shuffle_inds_bb = torch.LongTensor(shuffle_inds_bb) gt_boxes = torch.from_numpy(blobs['gt_boxes']) gt_boxes = gt_boxes[shuffle_inds_bb] ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region - trim_size) < 0: y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region - trim_size) / 2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] if gt_boxes is not None: # shift y coordiante of gt_boxes gt_boxes[:, :(gt_boxes.size(1) - 1)][:, 1::2] -= float(y_s) # update gt bounding box according the trip gt_boxes[:, :(gt_boxes.size(1) - 1)][:, 1::2].clamp_( 0, trim_size - 1) if gt_grasps is not None: gt_grasps[:, 1::2] -= float(y_s) keep = ( ((gt_grasps[:, 1::2] > 0) & (gt_grasps[:, 1::2] < trim_size - 1)).sum(1) == 4) gt_grasps = gt_grasps[keep] shuffle_inds_gr = shuffle_inds_gr[keep] if 'gt_grasp_inds' in blobs: gt_grasps_inds = gt_grasps_inds[keep] else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] if gt_boxes is not None: # shift x coordiante of gt_boxes gt_boxes[:, :(gt_boxes.size(1) - 1)][:, 0::2] -= float(x_s) # update gt bounding box according the trip gt_boxes[:, :(gt_boxes.size(1) - 1)][:, 0::2].clamp_( 0, trim_size - 1) if gt_grasps is not None: gt_grasps[:, 0::2] -= float(x_s) keep = ( ((gt_grasps[:, 0::2] > 0) & (gt_grasps[:, 1::2] < trim_size - 1)).sum(1) == 4) gt_grasps = gt_grasps[keep] shuffle_inds_gr = shuffle_inds_gr[keep] if 'gt_grasp_inds' in blobs: gt_grasps_inds = gt_grasps_inds[keep] # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] if gt_boxes is not None: # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :(gt_boxes.size(1) - 1)].clamp_(0, trim_size) if gt_grasps is not None: keep = (((gt_grasps > 0) & (gt_grasps < trim_size)).sum(1) == 8) gt_grasps = gt_grasps[keep] shuffle_inds_gr = shuffle_inds_gr[keep] if 'gt_grasp_inds' in blobs: gt_grasps_inds = gt_grasps_inds[keep] im_info[0, 0] = trim_size im_info[0, 1] = trim_size # grasp data num_grasps = 0 gt_grasps_padding = torch.FloatTensor(self.max_num_grasp, 8).zero_() gt_grasp_inds_padding = torch.FloatTensor( self.max_num_grasp).zero_() if 'gt_grasps' in blobs: num_grasps = min(gt_grasps.size(0), self.max_num_grasp) gt_grasps_padding[:num_grasps, :] = gt_grasps[:num_grasps] if 'gt_grasp_inds' in blobs: gt_grasp_inds_padding[: num_grasps] = gt_grasps_inds[: num_grasps] # object detection data # 4 coordinates (xmin, ymin, xmax, ymax) and 1 label num_boxes = 0 gt_boxes_padding = torch.FloatTensor(self.max_num_box, 5).zero_() rel_mat = torch.FloatTensor(self.max_num_box, self.max_num_box).zero_() if 'gt_boxes' in blobs: # check the bounding box: not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | ( gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() rel_mat = torch.FloatTensor(self.max_num_box, self.max_num_box).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] shuffle_inds_bb = shuffle_inds_bb[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] # get relationship matrix if 'nodeinds' in blobs: for o1 in range(num_boxes): for o2 in range(num_boxes): ind_o1 = blobs['nodeinds'][ shuffle_inds_bb[o1].item()] ind_o2 = blobs['nodeinds'][ shuffle_inds_bb[o2].item()] if ind_o2 == ind_o1 or rel_mat[o1, o2].item() != 0: continue o1_children = blobs['children'][ shuffle_inds_bb[o1].item()] o1_fathers = blobs['fathers'][ shuffle_inds_bb[o1].item()] if ind_o2 in o1_children: # o1 is o2's father rel_mat[o1, o2] = cfg.VMRN.FATHER elif ind_o2 in o1_fathers: # o1 is o2's child rel_mat[o1, o2] = cfg.VMRN.CHILD else: # o1 and o2 has no relationship rel_mat[o1, o2] = cfg.VMRN.NOREL # transfer index into sequence number of boxes returned, and filter out grasps belonging to dropped boxes. if 'gt_grasp_inds' in blobs: gt_grasp_inds_padding_ori = gt_grasp_inds_padding.clone() order2inds = dict(enumerate(blobs['nodeinds'])) inds2order = dict(zip(order2inds.values(), order2inds.keys())) shuffle2order = dict(enumerate(shuffle_inds_bb.data.numpy())) order2shuffle = dict( zip(shuffle2order.values(), shuffle2order.keys())) # make box index begins with 1 for key in order2shuffle.keys(): order2shuffle[key] += 1 for ind in blobs['nodeinds']: gt_grasp_inds_padding[gt_grasp_inds_padding_ori == \ float(ind)] = float(order2shuffle[inds2order[ind]]) # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(4) ''' im2show = padding_data.clone().squeeze().permute(1, 2, 0).cpu().numpy() grasps2show = gt_grasps.clone().cpu().numpy() box2show = gt_boxes.clone().cpu().numpy() label2show = box2show[:, -1].astype(np.int32) box2show = box2show[:,:-1] inds2show = range(1, box2show.shape[0]+1) graspinds2show = gt_grasp_inds_padding.clone().cpu().numpy() print(blobs['img_id']) self._show_object_label(im2show, box2show, label2show, grasps2show, inds2show, graspinds2show) ''' return padding_data, im_info, gt_boxes_padding, gt_grasps_padding, num_boxes, \ num_grasps, rel_mat, gt_grasp_inds_padding else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(4) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) gt_grasps = torch.FloatTensor([1, 1, 1, 1, 1, 1, 1, 1]) gt_grasp_inds = torch.FloatTensor([0]) num_boxes = 0 num_grasps = 0 rel_mat = torch.FloatTensor([0]) return data, im_info, gt_boxes, gt_grasps, num_boxes, num_grasps, rel_mat, gt_grasp_inds
if args.cuda: if args.gpu != -1: torch.cuda.set_device(args.gpu) model = model.cuda() else: device_id = [0, 1, 2, 3] torch.cuda.set_device(device_id[0]) model = nn.DataParallel(model, device_ids=device_id).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = StepLR(optimizer, step_size=30, gamma=0.5) data = torch.rand(2, 3, 5).float().to(args.gpu) print("data", data, data.shape) data = data.permute(0, 2, 1) print("data_p", data, data.shape) model.train() output = model(data) print("output", output) # def train(model, loader, epoch): # scheduler.step() # model.train() # torch.set_grad_enabled(True) # correct = 0 # dataset_size = 0 # for batch_idx, (data, target) in enumerate(loader): # # print("data", data, data.shape, "target", target) # dataset_size += data.shape[0] # data, target = data.float(), target.long().squeeze()
def __getitem__(self, index): # only one sample # 如果在训练过程中 if self.training: # s_ratio_list -> 排列后的长宽比列表(从小到大) index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group ''' 根据长宽比(从小到大)取出图片对应roi参数的字典{} {'boxes': boxes, 'gt_classes': gt_classes, 'gt_ishard': ishards, 'gt_overlaps': overlaps, 'flipped': False, # 不翻转 'seg_areas': seg_areas} ''' # minibatch_db 是列表[]->里面是一张图片的roi字典 minibatch_db = [self._roidb[index_ratio]] ''' # 关键->得到blobs字典包含 'data':图片(四维np)但其实只有一张图片的三维 'need_backprop':一维np数组[.1]要BP 'gt_boxes':Reg+cls用,二维np数组,每个目标有一个[] 'im_info':二维np数组,图像的ID和序号,但只有1张图 'img_id':int图片序号 ''' blobs = get_minibatch(minibatch_db, self._num_classes) # 把数据读入torch的变量中 data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. # 取图片的H和W data_height, data_width = data.size(1), data.size(2) # 如果在训练阶段 if self.training: """ da-faster-rcnn layer............ """ # 打乱bbox的顺序,并转移到torch np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) need_backprop = blobs['need_backprop'][0] ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. # 读入一个batch的目标长宽比 ratio = self.ratio_list_batch[index] # 进行图片的裁剪(如果需要),data裁剪 + gt_boxes坐标改变 if self._roidb[index_ratio]['need_crop']: # 如果是高图片 if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height # 读取bbox的最高点和最低点 min_y = int(torch.min(gt_boxes[:, 1])) max_y = int(torch.max(gt_boxes[:, 3])) # 长边height需要裁剪成为的大小 trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height # bbox的最大距离 box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: # bbox的最大距离 < 裁剪范围 if (box_region - trim_size) < 0: # 设点裁剪最低点的范围,并在范围中随机选择 y_s_min = max(max_y - trim_size, 0) y_s_max = min(min_y, data_height - trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) # bbox的最大距离 >= 裁剪范围 else: y_s_add = int((box_region - trim_size) / 2) # 刚好相等 if y_s_add == 0: y_s = min_y # bbox的最大距离 > 裁剪范围 else: y_s = np.random.choice( range(min_y, min_y + y_s_add)) # crop the image # 进行裁剪,按照以上原则,保证长宽比确定,->尽可能多的包含bbox的面积 data = data[:, y_s:(y_s + trim_size), :, :] # bbox的坐标跟随着裁剪进行变更 # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip # 防止超出图片的边界(bbox的最大距离 > 裁剪范围)的情况下 gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) # 如果是宽图片,类似操作 else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:, 0])) max_x = int(torch.max(gt_boxes[:, 2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region - trim_size) < 0: x_s_min = max(max_x - trim_size, 0) x_s_max = min(min_x, data_width - trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region - trim_size) / 2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice( range(min_x, min_x + x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. # 进行图像的拉伸 # 高图片 if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) # 创建一个矩阵(高*宽*3),但是之前不是裁剪过了?? padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() # 有什么区别?? data_height 和 np.ceil(data_width / ratio) padding_data[:data_height, :, :] = data[0] # update im_info # 更改图片信息 im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) # 宽图片 elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: # 选出有面积的bbox,形成列表 not_keep = (gt_boxes[:, 0] == gt_boxes[:, 2]) | (gt_boxes[:, 1] == gt_boxes[:, 3]) keep = torch.nonzero(not_keep == 0).view(-1) # 创建数组(bbox的数量 * 维度(5)),初始化未0 gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() # 如果keep张量的元素个数不为0 if keep.numel() != 0: # 取出bbox的值 gt_boxes = gt_boxes[keep] # 取出bbox的数量 num_boxes = min(gt_boxes.size(0), self.max_num_box) # 写入张量中 gt_boxes_padding[:num_boxes, :] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing # 进行维度转化,通道数放在最前 # view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) ''' # 返回的是什么 # padding_data 图像数据 # im_info # gt_boxes_padding -> bbox的5个标注 # num_boxes -> bbox的数量 # need_backprop -> 是否需要反向传播 ''' return padding_data, im_info, gt_boxes_padding,num_boxes,\ need_backprop # 不是训练过程 -> 并不加载GT else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) num_boxes = 0 need_backprop = 0 return data, im_info, gt_boxes, num_boxes, need_backprop
def __getitem__(self, index): if self.training: index_ratio = int(self.ratio_index[index]) else: index_ratio = index # get the anchor index for current sample index # here we set the anchor index to the last one # sample in this group minibatch_db = [self._roidb[index_ratio]] blobs = get_minibatch(minibatch_db, self._num_classes) data = torch.from_numpy(blobs['data']) im_info = torch.from_numpy(blobs['im_info']) # we need to random shuffle the bounding box. data_height, data_width = data.size(1), data.size(2) if self.training: np.random.shuffle(blobs['gt_boxes']) gt_boxes = torch.from_numpy(blobs['gt_boxes']) ######################################################## # padding the input image to fixed size for each group # ######################################################## # NOTE1: need to cope with the case where a group cover both conditions. (done) # NOTE2: need to consider the situation for the tail samples. (no worry) # NOTE3: need to implement a parallel data loader. (no worry) # get the index range # if the image need to crop, crop to the target size. ratio = self.ratio_list_batch[index] if self._roidb[index_ratio]['need_crop']: if ratio < 1: # this means that data_width << data_height, we need to crop the # data_height min_y = int(torch.min(gt_boxes[:,1])) max_y = int(torch.max(gt_boxes[:,3])) trim_size = int(np.floor(data_width / ratio)) if trim_size > data_height: trim_size = data_height box_region = max_y - min_y + 1 if min_y == 0: y_s = 0 else: if (box_region-trim_size) < 0: y_s_min = max(max_y-trim_size, 0) y_s_max = min(min_y, data_height-trim_size) if y_s_min == y_s_max: y_s = y_s_min else: y_s = np.random.choice(range(y_s_min, y_s_max)) else: y_s_add = int((box_region-trim_size)/2) if y_s_add == 0: y_s = min_y else: y_s = np.random.choice(range(min_y, min_y+y_s_add)) # crop the image data = data[:, y_s:(y_s + trim_size), :, :] # shift y coordiante of gt_boxes gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s) gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s) # update gt bounding box according the trip gt_boxes[:, 1].clamp_(0, trim_size - 1) gt_boxes[:, 3].clamp_(0, trim_size - 1) else: # this means that data_width >> data_height, we need to crop the # data_width min_x = int(torch.min(gt_boxes[:,0])) max_x = int(torch.max(gt_boxes[:,2])) trim_size = int(np.ceil(data_height * ratio)) if trim_size > data_width: trim_size = data_width box_region = max_x - min_x + 1 if min_x == 0: x_s = 0 else: if (box_region-trim_size) < 0: x_s_min = max(max_x-trim_size, 0) x_s_max = min(min_x, data_width-trim_size) if x_s_min == x_s_max: x_s = x_s_min else: x_s = np.random.choice(range(x_s_min, x_s_max)) else: x_s_add = int((box_region-trim_size)/2) if x_s_add == 0: x_s = min_x else: x_s = np.random.choice(range(min_x, min_x+x_s_add)) # crop the image data = data[:, :, x_s:(x_s + trim_size), :] # shift x coordiante of gt_boxes gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s) gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s) # update gt bounding box according the trip gt_boxes[:, 0].clamp_(0, trim_size - 1) gt_boxes[:, 2].clamp_(0, trim_size - 1) # based on the ratio, padding the image. if ratio < 1: # this means that data_width < data_height trim_size = int(np.floor(data_width / ratio)) padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \ data_width, 3).zero_() padding_data[:data_height, :, :] = data[0] # update im_info im_info[0, 0] = padding_data.size(0) # print("height %d %d \n" %(index, anchor_idx)) elif ratio > 1: # this means that data_width > data_height # if the image need to crop. padding_data = torch.FloatTensor(data_height, \ int(np.ceil(data_height * ratio)), 3).zero_() padding_data[:, :data_width, :] = data[0] im_info[0, 1] = padding_data.size(1) else: trim_size = min(data_height, data_width) padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_() padding_data = data[0][:trim_size, :trim_size, :] # gt_boxes.clamp_(0, trim_size) gt_boxes[:, :4].clamp_(0, trim_size) im_info[0, 0] = trim_size im_info[0, 1] = trim_size # check the bounding box: not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3]) keep = torch.nonzero(not_keep == 0).view(-1) gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_() if keep.numel() != 0: gt_boxes = gt_boxes[keep] num_boxes = min(gt_boxes.size(0), self.max_num_box) gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes] else: num_boxes = 0 # permute trim_data to adapt to downstream processing padding_data = padding_data.permute(2, 0, 1).contiguous() im_info = im_info.view(3) return padding_data, im_info, gt_boxes_padding, num_boxes else: data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width) im_info = im_info.view(3) gt_boxes = torch.FloatTensor([1,1,1,1,1]) num_boxes = 0 return data, im_info, gt_boxes, num_boxes
def restore(cls, data): data = data.permute(1, 2, 0).to('cpu').data.numpy() data = data * 255. data += cls.RGB_MEAN return data.astype(np.uint8)