def extract_regions(self, index): regions = np.zeros((len(index), self.crop_size, self.crop_size, 3), dtype='uint8') for i, sample in enumerate(self.samples[index]): regions[i] = crop_image(self.image, sample, self.crop_size, self.padding) regions = regions.transpose(0, 3, 1, 2).astype('float32') regions = regions - 128. return regions
def __call__(self, frame: Frame, im: Image, last_frame: Frame, last_im: Image, debug=False, *args, **kwargs): if self.model is None: self._init_model() if last_frame is not None: ims1 = [crop_image(im, rectangle) for rectangle in frame.detections] ims2 = [crop_image(last_im, rectangle) for rectangle in last_frame.detections] embeddings1 = [self._get_embedding(im1) for im1 in ims1] embeddings2 = [self._get_embedding(im2) for im2 in ims2] for i, embedding1 in enumerate(embeddings1): min_j, min_distance = (seq(enumerate(embeddings2)) .map(lambda pair: (pair[0], np.linalg.norm(embedding1 - pair[1]))) .min_by(lambda pair: pair[1])) if min_distance < self.threshold: frame.detections[i].id = last_frame.detections[min_j].id for detection in frame.detections: if detection.id == -1: detection.id = IDGenerator.next()
def train(continue_epi=250000, policy_path="../Models/policy_template/50000_base_policy.pth"): ram = buffer.MemoryBuffer(MAX_BUFFER) siam = SiameseNet(BaselineEmbeddingNet()) pi = T_Policy(T_N) pi.load_state_dict(torch.load(policy_path)) if torch.cuda.is_available(): pi = pi.cuda() siam = siam.cuda() trainer = Trainer(ram) # continue_epi = 0 if continue_epi > 0: trainer.load_models(continue_epi) var = 0.5 start_time = time.time() vis = Visdom(env='td_error') line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = Image.open(frame_name_list[0]).convert('RGB') img_size = img.size ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 templates = [] for init_num in range(1): trainer.init_actor(img, ground_th) img = Image.open(frame_name_list[init_num]).convert('RGB') template = crop_image(np.array(img), ground_th) for i in range(T_N): templates.append(template) for frame in range(1, length): img = Image.open(frame_name_list[frame]).convert('RGB') pos_ = pos img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(cv2_img, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) responses = siam( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) action_tensor = pi(responses.permute(1, 0, 2, 3).cuda()) del responses action = action_tensor.cpu().detach().numpy() action_id = np.argmax(action) template = templates[action_id] imo_g = np2tensor(np.array(template).reshape(1, 107, 107, 3)) # img_l = np2tensor(np_img_l) # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.) deta_pos = trainer.actor(imo_l, imo_g).squeeze(0).cpu().detach().numpy() if np.random.random(1) < var or frame <= 5 or frame % 15 == 0: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 0.1: deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(pos_, deta_pos, img_size, rate) if frame % INTERVRAL == 0: template = crop_image(np.array(img), pos_) templates.append(template) templates.pop(1) img_crop_l_, img_crop_g_, out_flag = crop_image_actor_( np.array(img), pos_) # if out_flag: # pos = gt[frame] # continue imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) gt_frame = gt[frame] r = _compute_iou(pos_, gt[frame]) if r > 0.7: reward = 1 elif r >= 0.5 and r <= 0.7: gt_pre = gt[frame - 1] r_pre = _compute_iou(pos, gt_pre) reward = max(0, r - r_pre) else: reward = -1 imo_g_ = np.array(template).reshape(3, 107, 107) trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward, npBN(imo_crop_l), npBN(imo_g_)) # if r == 0: # break reward_all += reward pos = pos_ if out_flag or r == 0: pos = gt[frame] trainer.optimize() reward_100 += reward_all gc.collect() if train_step % 100 == 0: td_error = trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) message = 'train_step: %d, reward_100: %d, td_error: %f \n' % ( train_step, reward_100, y) with open("../logs/train_td_error.txt", "a", encoding='utf-8') as f: f.write(message) vis.line(X=np.array([train_step]), Y=np.array([y]), win=line_loss, update='append') reward_100 = 0 if train_step % 200 == 0: trainer.save_models(train_step) if train_step % 10000 == 0: var = var * 0.95
# argparse parser = argparse.ArgumentParser(description='parameters for training') parser.add_argument('--image_path', '-i', default='/home/dell/Downloads/noise_raw/0001_NOISY_RAW/', help='path to noise folder image') # parser.add_argument('--image_path', '-n', default='/home/dell/Downloads/noise_raw/split/', help='path to noise folder image') parser.add_argument('--save_path', '-s', default='/home/dell/Downloads/noise_raw/split/', help='path to gt folder image') parser.add_argument('--crop_size', '-c', default=256, type=int, help='Crop size') args = parser.parse_args() # if not os.path.exists(args.save_path): os.mkdir(args.save_path) files_ = glob.glob(os.path.join(args.image_path,"*")) crop_size = (args.crop_size,args.crop_size) for fi in files_: name = fi.split('/')[-1].split(".")[0] input_image = read_raw(fi) print(input_image.shape) pack_img = pack_raw(input_image) print(pack_img.shape) list_img_crop = crop_image(pack_img, crop_size) # print(input_image) # f = h5py.File(os.path.join(args.save_path,name + "__"+".MAT"), "w") # f.create_dataset('y', data=input_image,dtype='float32') # f.close() for i in range(len(list_img_crop)): f = h5py.File(os.path.join(args.save_path,name + "_" + str(i) + ".MAT"), "w") f.create_dataset('x', data=list_img_crop[i],dtype='float32') f.close()
def train(): ram = buffer.MemoryBuffer(MAX_BUFFER) trainer = Trainer(ram) continue_epi = 0 if continue_epi > 0: trainer.load_models(continue_epi) var = 0.5 start_time = time.time() vis = Visdom(env='td_error') line_loss = vis.line(np.arange(1)) train_ilsvrc_data_path = 'ilsvrc_train_new.json' # ilsvrc_home = '/media/ps/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = Image.open(frame_name_list[0]).convert('RGB') img_size = img.size ground_th = gt[0] rate = ground_th[2] / ground_th[3] pos = ground_th reward_all = 0 for init_num in range(1): trainer.init_actor(img, ground_th) for frame in range(1, length): img = Image.open(frame_name_list[frame]).convert('RGB') pos_ = pos img_crop_l, img_crop_g = crop_image(np.array(img), pos) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3)) # img_l = np2tensor(np_img_l) # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.) deta_pos = trainer.actor(imo_l, imo_g).squeeze(0).cpu().detach().numpy() if np.random.random(1) < var: deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]])) if np.max(abs(deta_pos_)) < 1: deta_pos = deta_pos_[0] if deta_pos[2] > 0.05 or deta_pos[2] < -0.05: deta_pos[2] = 0 pos_ = move_crop(pos_, deta_pos, img_size, rate) img_crop_l_, img_crop_g_ = crop_image(np.array(img), pos_) imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107) # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3) r = _compute_iou(pos_, gt[frame]) if r > 0.7: reward = 1 else: reward = -1 ram.add(imo_crop_g, imo_g_, deta_pos, reward, imo_crop_l, imo_l_) reward_all += reward pos = pos_ trainer.optimize() reward_100 += reward_all if train_step % 100 == 0: td_error = trainer.show_critic_loss() print(train_step, reward_100, 'td_error', td_error) y = np.array(td_error.cpu().detach().numpy()) vis.line(X=np.array([train_step]), Y=np.array([y]), win=line_loss, update='append') reward_100 = 0 if train_step % 200 == 0: trainer.save_models(train_step) if train_step % 10000 == 0: var = var * 0.95
def train(continue_epi=5600, policy_path="../models/Qnet/template_policy/{}_template_policy.pth", siamfc_path="../models/siamfc_pretrained.pth", gpu_id=0): #强化学习样本存储空间 ram = ReplayBuffer() #q-learning 网络 q = QNet_cir() #q-leraning 目标网络 q_target = QNet_cir() #优化器 q_optimizer = torch.optim.Adam(q.parameters(), lr=0.0005) #siamfc跟踪器 siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id) #模板选择网络 pi = T_Policy(T_N) weights_init(pi) if continue_epi > 0: #加载模板选择网络预训练权重 pretrained_pi_dict = torch.load(policy_path.format(continue_epi)) pi_dict = pi.state_dict() pretrained_pi_dict = { k: v for k, v in pretrained_pi_dict.items() if k in pi_dict } # and k.startswith("conv")} pi_dict.update(pretrained_pi_dict) pi.load_state_dict(pi_dict) pretrained_q_dict = torch.load( "../models/Qnet/QLT/{}_Qnet.pth".format(continue_epi)) q_dict = q.state_dict() pretrained_q_dict = { k: v for k, v in pretrained_q_dict.items() if k in q_dict } q_dict.update(pretrained_q_dict) q.load_state_dict(q_dict) q_target.load_state_dict(q.state_dict()) siam = SiameseNet(BaselineEmbeddingNet()) # weights_init(siam) pretrained_siam = torch.load(siamfc_path) siam_dict = siam.state_dict() pretrained_siam = { k: v for k, v in pretrained_siam.items() if k in siam_dict } siam_dict.update(pretrained_siam) siam.load_state_dict(siam_dict) if torch.cuda.is_available(): pi = pi.cuda() siam = siam.cuda() q = q.cuda() q_target = q_target.cuda() var = 0.3 train_ilsvrc_data_path = 'ilsvrc_train_new.json' ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID' # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID' reward_100 = 0 train_dataset = ILSVRCDataset(train_ilsvrc_data_path, ilsvrc_home + '/train') for train_step in range(MAX_EPISODES): frame_name_list, gt, length = train_dataset.next() img = cv2.cvtColor(cv2.imread(frame_name_list[0]), cv2.COLOR_BGR2RGB) img_size = (img.shape[1], img.shape[0]) ground_th = gt[0] rate = ground_th[2] / ground_th[3] reward_all = 0 templates = [] for init_num in range(1): template = siamfc.init(img, ground_th) for i in range(T_N): templates.append(template) for frame in range(1, length): cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(cv2_img, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) with torch.no_grad(): responses = siam( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) pi_input = torch.tensor(responses).permute(1, 0, 2, 3).cuda() del responses, np_imgs, np_img action = pi(pi_input).cpu() action_id = np.argmax(action.detach().numpy()) template = templates[action_id] with torch.no_grad(): siam_box_oral = siamfc.update(cv2_img, templates[0]) siam_box = siamfc.update(cv2_img, template) siam_box_oral = [ siam_box_oral[0], siam_box_oral[1], siam_box_oral[2] - siam_box_oral[0], siam_box_oral[3] - siam_box_oral[1] ] siam_box = [ siam_box[0], siam_box[1], siam_box[2] - siam_box[0], siam_box[3] - siam_box[1] ] img_crop_l = crop_image(np.array(cv2_img), siam_box_oral) imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107)) imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3)) del img_crop_l expect = 0 act_pos = np.zeros(7) a = np.random.randint(7) pos = np.array(siam_box_oral) deta = 5 deta_pos = np.zeros(3) if np.random.random(1) < var or frame <= 3 or frame % 30 == 0: expect = 1 deta_pos_ = cal_distance(np.vstack([pos, pos]), np.vstack([gt[frame], gt[frame]]))[0] a_ind = np.argmax(np.abs(deta_pos_)) if (a_ind == 0): if (deta_pos_[a_ind] > 0): a = 3 else: a = 4 if (a_ind == 1): if (deta_pos_[a_ind] > 0): a = 1 else: a = 2 if (a_ind == 2): if (deta_pos_[a_ind] > 0): a = 6 else: a = 5 else: a = q.sample_action(imo_l) del imo_l act_pos[a] = 1 if (a == 1): deta_pos[1] += deta / siam_box_oral[3] if (a == 2): deta_pos[1] -= deta / siam_box_oral[3] if (a == 3): deta_pos[0] += deta / siam_box_oral[2] if (a == 4): deta_pos[0] -= deta / siam_box_oral[2] if (a == 5): deta_pos[2] -= deta / max(siam_box_oral[3], siam_box_oral[2]) if (a == 6): deta_pos[2] += deta / max(siam_box_oral[3], siam_box_oral[2]) pos_ = move_crop(pos, deta_pos, img_size, rate) img_crop_l_ = crop_image(np.array(cv2_img), pos_) imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107) iou_siam_oral = _compute_iou(siam_box_oral, gt[frame]) if iou_siam_oral < 0.2: continue iou_siam = _compute_iou(siam_box, gt[frame]) iou_ac = _compute_iou(pos_, gt[frame]) if iou_ac > iou_siam_oral: reward_ac = 1 else: reward_ac = -1 if iou_siam > iou_siam_oral: reward_t = 1 else: reward_t = -1 message = "iou_siam_oral: %2f, iou_siam: %2f, iou_ac: %2f ,expecte :%d\n" % ( iou_siam_oral, iou_siam, iou_ac, expect) with open("../logs/iou.txt", "a", encoding='utf-8') as f: f.write(message) if reward_ac or reward_t and iou_siam_oral > 0.6: template = siamfc.init(cv2_img, pos_) templates.append(template) templates.pop(1) log_pi = torch.log(action[0, action_id]) pi.put_data((reward_t, log_pi)) ram.put((npBN(imo_crop_l), act_pos, reward_ac, npBN(imo_l_))) reward_all += reward_ac with open("../logs/iou.txt", "a", encoding='utf-8') as f: f.write('\n\n') if ram.size() >= 640: QNet_train(q, q_target, ram, q_optimizer) pi.train_policy() reward_100 += reward_all if train_step % 100 == 0 and train_step != 0: q_target.load_state_dict(q.state_dict()) print("# of episode:{}, avg score : {:.1f}, buffer size:{}".format( train_step, reward_100 / 100, ram.size())) reward_100 = 0 if train_step % 400 == 0 and train_step != 0: torch.save( q_target.state_dict(), '../models/Qnet/QLT/' + str(train_step + continue_epi) + '_Qnet.pth') torch.save( pi.state_dict(), '../models/Qnet/template_policy/' + str(train_step + continue_epi) + '_template_policy.pth') print("save model----{}".format(str(train_step + continue_epi))) if train_step % 10000 == 0: var = var * 0.95