def extract_regions(self, index):
        regions = np.zeros((len(index), self.crop_size, self.crop_size, 3),
                           dtype='uint8')
        for i, sample in enumerate(self.samples[index]):
            regions[i] = crop_image(self.image, sample, self.crop_size,
                                    self.padding)

        regions = regions.transpose(0, 3, 1, 2).astype('float32')
        regions = regions - 128.
        return regions
示例#2
0
    def __call__(self, frame: Frame, im: Image, last_frame: Frame, last_im: Image, debug=False, *args,
                 **kwargs):
        if self.model is None:
            self._init_model()

        if last_frame is not None:
            ims1 = [crop_image(im, rectangle) for rectangle in frame.detections]
            ims2 = [crop_image(last_im, rectangle) for rectangle in last_frame.detections]

            embeddings1 = [self._get_embedding(im1) for im1 in ims1]
            embeddings2 = [self._get_embedding(im2) for im2 in ims2]
            for i, embedding1 in enumerate(embeddings1):
                min_j, min_distance = (seq(enumerate(embeddings2))
                                       .map(lambda pair: (pair[0], np.linalg.norm(embedding1 - pair[1])))
                                       .min_by(lambda pair: pair[1]))
                if min_distance < self.threshold:
                    frame.detections[i].id = last_frame.detections[min_j].id

        for detection in frame.detections:
            if detection.id == -1:
                detection.id = IDGenerator.next()
示例#3
0
def train(continue_epi=250000,
          policy_path="../Models/policy_template/50000_base_policy.pth"):
    ram = buffer.MemoryBuffer(MAX_BUFFER)
    siam = SiameseNet(BaselineEmbeddingNet())
    pi = T_Policy(T_N)
    pi.load_state_dict(torch.load(policy_path))
    if torch.cuda.is_available():
        pi = pi.cuda()
        siam = siam.cuda()
    trainer = Trainer(ram)
    # continue_epi = 0
    if continue_epi > 0:
        trainer.load_models(continue_epi)
    var = 0.5
    start_time = time.time()
    vis = Visdom(env='td_error')
    line_loss = vis.line(np.arange(1))
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = Image.open(frame_name_list[0]).convert('RGB')
        img_size = img.size

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        pos = ground_th
        reward_all = 0
        templates = []
        for init_num in range(1):
            trainer.init_actor(img, ground_th)
            img = Image.open(frame_name_list[init_num]).convert('RGB')
            template = crop_image(np.array(img), ground_th)
            for i in range(T_N):
                templates.append(template)

        for frame in range(1, length):
            img = Image.open(frame_name_list[frame]).convert('RGB')
            pos_ = pos
            img_crop_l, img_crop_g, _ = crop_image_actor_(np.array(img), pos)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107))

            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3))

            cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]),
                                   cv2.COLOR_BGR2RGB)
            np_img = np.array(
                cv2.resize(cv2_img, (255, 255),
                           interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
            np_imgs = []
            for i in range(T_N):
                np_imgs.append(np_img)
            responses = siam(
                torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                torch.Tensor(np_imgs).float().cuda())

            action_tensor = pi(responses.permute(1, 0, 2, 3).cuda())
            del responses
            action = action_tensor.cpu().detach().numpy()
            action_id = np.argmax(action)
            template = templates[action_id]
            imo_g = np2tensor(np.array(template).reshape(1, 107, 107, 3))

            # img_l = np2tensor(np_img_l)
            # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.)
            deta_pos = trainer.actor(imo_l,
                                     imo_g).squeeze(0).cpu().detach().numpy()

            if np.random.random(1) < var or frame <= 5 or frame % 15 == 0:
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))
                if np.max(abs(deta_pos_)) < 0.1:
                    deta_pos = deta_pos_[0]

            if deta_pos[2] > 0.05 or deta_pos[2] < -0.05:
                deta_pos[2] = 0

            pos_ = move_crop(pos_, deta_pos, img_size, rate)
            if frame % INTERVRAL == 0:
                template = crop_image(np.array(img), pos_)
                templates.append(template)
                templates.pop(1)
            img_crop_l_, img_crop_g_, out_flag = crop_image_actor_(
                np.array(img), pos_)
            # if out_flag:
            #     pos = gt[frame]
            #     continue
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107)

            # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3)
            gt_frame = gt[frame]
            r = _compute_iou(pos_, gt[frame])

            if r > 0.7:
                reward = 1
            elif r >= 0.5 and r <= 0.7:
                gt_pre = gt[frame - 1]
                r_pre = _compute_iou(pos, gt_pre)
                reward = max(0, r - r_pre)
            else:
                reward = -1
            imo_g_ = np.array(template).reshape(3, 107, 107)
            trainer.ram.add(npBN(imo_crop_g), npBN(imo_g_), deta_pos, reward,
                            npBN(imo_crop_l), npBN(imo_g_))
            # if r == 0:
            #     break
            reward_all += reward
            pos = pos_
            if out_flag or r == 0:
                pos = gt[frame]
        trainer.optimize()
        reward_100 += reward_all
        gc.collect()
        if train_step % 100 == 0:
            td_error = trainer.show_critic_loss()

            print(train_step, reward_100, 'td_error', td_error)
            y = np.array(td_error.cpu().detach().numpy())
            message = 'train_step: %d, reward_100: %d, td_error: %f \n' % (
                train_step, reward_100, y)
            with open("../logs/train_td_error.txt", "a",
                      encoding='utf-8') as f:
                f.write(message)
            vis.line(X=np.array([train_step]),
                     Y=np.array([y]),
                     win=line_loss,
                     update='append')
            reward_100 = 0

        if train_step % 200 == 0:
            trainer.save_models(train_step)
        if train_step % 10000 == 0:
            var = var * 0.95
示例#4
0
    # argparse
    parser = argparse.ArgumentParser(description='parameters for training')
    parser.add_argument('--image_path', '-i', default='/home/dell/Downloads/noise_raw/0001_NOISY_RAW/', help='path to noise folder image')
    # parser.add_argument('--image_path', '-n', default='/home/dell/Downloads/noise_raw/split/', help='path to noise folder image')
    parser.add_argument('--save_path', '-s', default='/home/dell/Downloads/noise_raw/split/', help='path to gt folder image')
    parser.add_argument('--crop_size', '-c', default=256, type=int, help='Crop size')

    args = parser.parse_args()
    #
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    files_ = glob.glob(os.path.join(args.image_path,"*"))
    crop_size = (args.crop_size,args.crop_size)

    for fi in files_:
        name = fi.split('/')[-1].split(".")[0]
        input_image = read_raw(fi)
        print(input_image.shape)
        pack_img = pack_raw(input_image)
        print(pack_img.shape)
        list_img_crop = crop_image(pack_img, crop_size)
        # print(input_image)
        # f = h5py.File(os.path.join(args.save_path,name + "__"+".MAT"), "w")
        # f.create_dataset('y', data=input_image,dtype='float32')
        # f.close()
        for i in range(len(list_img_crop)):
            f = h5py.File(os.path.join(args.save_path,name + "_" +  str(i)  + ".MAT"), "w")
            f.create_dataset('x', data=list_img_crop[i],dtype='float32')
            f.close()

示例#5
0
def train():
    ram = buffer.MemoryBuffer(MAX_BUFFER)
    trainer = Trainer(ram)
    continue_epi = 0
    if continue_epi > 0:
        trainer.load_models(continue_epi)
    var = 0.5
    start_time = time.time()
    vis = Visdom(env='td_error')
    line_loss = vis.line(np.arange(1))
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    # ilsvrc_home = '/media/ps/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = Image.open(frame_name_list[0]).convert('RGB')
        img_size = img.size

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        pos = ground_th
        reward_all = 0

        for init_num in range(1):
            trainer.init_actor(img, ground_th)

        for frame in range(1, length):
            img = Image.open(frame_name_list[frame]).convert('RGB')
            pos_ = pos
            img_crop_l, img_crop_g = crop_image(np.array(img), pos)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_crop_g = (np.array(img_crop_g).reshape(3, 107, 107))

            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            imo_g = np2tensor(np.array(img_crop_g).reshape(1, 107, 107, 3))

            # img_l = np2tensor(np_img_l)
            # torch_image = loader(img.resize((255, 255),Image.ANTIALIAS)).unsqueeze(0).cuda().mul(255.)
            deta_pos = trainer.actor(imo_l,
                                     imo_g).squeeze(0).cpu().detach().numpy()

            if np.random.random(1) < var:
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))
                if np.max(abs(deta_pos_)) < 1:
                    deta_pos = deta_pos_[0]

            if deta_pos[2] > 0.05 or deta_pos[2] < -0.05:
                deta_pos[2] = 0

            pos_ = move_crop(pos_, deta_pos, img_size, rate)
            img_crop_l_, img_crop_g_ = crop_image(np.array(img), pos_)
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            imo_g_ = np.array(img_crop_g_).reshape(3, 107, 107)

            # img_l_ = np.array(img_l_).reshape(1, 127, 127, 3)
            r = _compute_iou(pos_, gt[frame])

            if r > 0.7:
                reward = 1
            else:
                reward = -1

            ram.add(imo_crop_g, imo_g_, deta_pos, reward, imo_crop_l, imo_l_)
            reward_all += reward
            pos = pos_
        trainer.optimize()
        reward_100 += reward_all

        if train_step % 100 == 0:
            td_error = trainer.show_critic_loss()
            print(train_step, reward_100, 'td_error', td_error)
            y = np.array(td_error.cpu().detach().numpy())
            vis.line(X=np.array([train_step]),
                     Y=np.array([y]),
                     win=line_loss,
                     update='append')
            reward_100 = 0

        if train_step % 200 == 0:
            trainer.save_models(train_step)
        if train_step % 10000 == 0:
            var = var * 0.95
示例#6
0
def train(continue_epi=5600,
          policy_path="../models/Qnet/template_policy/{}_template_policy.pth",
          siamfc_path="../models/siamfc_pretrained.pth",
          gpu_id=0):
    #强化学习样本存储空间
    ram = ReplayBuffer()
    #q-learning 网络
    q = QNet_cir()
    #q-leraning 目标网络
    q_target = QNet_cir()
    #优化器
    q_optimizer = torch.optim.Adam(q.parameters(), lr=0.0005)
    #siamfc跟踪器
    siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id)
    #模板选择网络
    pi = T_Policy(T_N)
    weights_init(pi)

    if continue_epi > 0:
        #加载模板选择网络预训练权重
        pretrained_pi_dict = torch.load(policy_path.format(continue_epi))
        pi_dict = pi.state_dict()
        pretrained_pi_dict = {
            k: v
            for k, v in pretrained_pi_dict.items() if k in pi_dict
        }  # and k.startswith("conv")}
        pi_dict.update(pretrained_pi_dict)
        pi.load_state_dict(pi_dict)

        pretrained_q_dict = torch.load(
            "../models/Qnet/QLT/{}_Qnet.pth".format(continue_epi))
        q_dict = q.state_dict()
        pretrained_q_dict = {
            k: v
            for k, v in pretrained_q_dict.items() if k in q_dict
        }
        q_dict.update(pretrained_q_dict)
        q.load_state_dict(q_dict)

    q_target.load_state_dict(q.state_dict())

    siam = SiameseNet(BaselineEmbeddingNet())
    # weights_init(siam)
    pretrained_siam = torch.load(siamfc_path)
    siam_dict = siam.state_dict()
    pretrained_siam = {
        k: v
        for k, v in pretrained_siam.items() if k in siam_dict
    }
    siam_dict.update(pretrained_siam)
    siam.load_state_dict(siam_dict)

    if torch.cuda.is_available():
        pi = pi.cuda()
        siam = siam.cuda()
        q = q.cuda()
        q_target = q_target.cuda()

    var = 0.3
    train_ilsvrc_data_path = 'ilsvrc_train_new.json'
    ilsvrc_home = '/media/x/D/wujinming/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    # ilsvrc_home = '/media/ubuntu/DATA/Document/ILSVRC2015_VID/ILSVRC2015/Data/VID'
    reward_100 = 0
    train_dataset = ILSVRCDataset(train_ilsvrc_data_path,
                                  ilsvrc_home + '/train')
    for train_step in range(MAX_EPISODES):
        frame_name_list, gt, length = train_dataset.next()
        img = cv2.cvtColor(cv2.imread(frame_name_list[0]), cv2.COLOR_BGR2RGB)
        img_size = (img.shape[1], img.shape[0])

        ground_th = gt[0]
        rate = ground_th[2] / ground_th[3]

        reward_all = 0
        templates = []
        for init_num in range(1):
            template = siamfc.init(img, ground_th)
            for i in range(T_N):
                templates.append(template)

        for frame in range(1, length):
            cv2_img = cv2.cvtColor(cv2.imread(frame_name_list[frame]),
                                   cv2.COLOR_BGR2RGB)
            np_img = np.array(
                cv2.resize(cv2_img, (255, 255),
                           interpolation=cv2.INTER_AREA)).transpose(2, 0, 1)
            np_imgs = []
            for i in range(T_N):
                np_imgs.append(np_img)
            with torch.no_grad():
                responses = siam(
                    torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(),
                    torch.Tensor(np_imgs).float().cuda())
            pi_input = torch.tensor(responses).permute(1, 0, 2, 3).cuda()
            del responses, np_imgs, np_img
            action = pi(pi_input).cpu()

            action_id = np.argmax(action.detach().numpy())
            template = templates[action_id]
            with torch.no_grad():
                siam_box_oral = siamfc.update(cv2_img, templates[0])
                siam_box = siamfc.update(cv2_img, template)
            siam_box_oral = [
                siam_box_oral[0], siam_box_oral[1],
                siam_box_oral[2] - siam_box_oral[0],
                siam_box_oral[3] - siam_box_oral[1]
            ]
            siam_box = [
                siam_box[0], siam_box[1], siam_box[2] - siam_box[0],
                siam_box[3] - siam_box[1]
            ]

            img_crop_l = crop_image(np.array(cv2_img), siam_box_oral)
            imo_crop_l = (np.array(img_crop_l).reshape(3, 107, 107))
            imo_l = np2tensor(np.array(img_crop_l).reshape(1, 107, 107, 3))
            del img_crop_l
            expect = 0
            act_pos = np.zeros(7)
            a = np.random.randint(7)
            pos = np.array(siam_box_oral)
            deta = 5
            deta_pos = np.zeros(3)
            if np.random.random(1) < var or frame <= 3 or frame % 30 == 0:
                expect = 1
                deta_pos_ = cal_distance(np.vstack([pos, pos]),
                                         np.vstack([gt[frame], gt[frame]]))[0]
                a_ind = np.argmax(np.abs(deta_pos_))
                if (a_ind == 0):
                    if (deta_pos_[a_ind] > 0):
                        a = 3
                    else:
                        a = 4
                if (a_ind == 1):
                    if (deta_pos_[a_ind] > 0):
                        a = 1
                    else:
                        a = 2
                if (a_ind == 2):
                    if (deta_pos_[a_ind] > 0):
                        a = 6
                    else:
                        a = 5
            else:
                a = q.sample_action(imo_l)
            del imo_l
            act_pos[a] = 1
            if (a == 1):
                deta_pos[1] += deta / siam_box_oral[3]
            if (a == 2):
                deta_pos[1] -= deta / siam_box_oral[3]
            if (a == 3):
                deta_pos[0] += deta / siam_box_oral[2]
            if (a == 4):
                deta_pos[0] -= deta / siam_box_oral[2]
            if (a == 5):
                deta_pos[2] -= deta / max(siam_box_oral[3], siam_box_oral[2])
            if (a == 6):
                deta_pos[2] += deta / max(siam_box_oral[3], siam_box_oral[2])
            pos_ = move_crop(pos, deta_pos, img_size, rate)
            img_crop_l_ = crop_image(np.array(cv2_img), pos_)
            imo_l_ = np.array(img_crop_l_).reshape(3, 107, 107)
            iou_siam_oral = _compute_iou(siam_box_oral, gt[frame])
            if iou_siam_oral < 0.2:
                continue
            iou_siam = _compute_iou(siam_box, gt[frame])
            iou_ac = _compute_iou(pos_, gt[frame])
            if iou_ac > iou_siam_oral:
                reward_ac = 1
            else:
                reward_ac = -1
            if iou_siam > iou_siam_oral:
                reward_t = 1
            else:
                reward_t = -1
            message = "iou_siam_oral: %2f, iou_siam: %2f, iou_ac: %2f ,expecte :%d\n" % (
                iou_siam_oral, iou_siam, iou_ac, expect)
            with open("../logs/iou.txt", "a", encoding='utf-8') as f:
                f.write(message)
            if reward_ac or reward_t and iou_siam_oral > 0.6:
                template = siamfc.init(cv2_img, pos_)
                templates.append(template)
                templates.pop(1)
            log_pi = torch.log(action[0, action_id])
            pi.put_data((reward_t, log_pi))
            ram.put((npBN(imo_crop_l), act_pos, reward_ac, npBN(imo_l_)))
            reward_all += reward_ac
        with open("../logs/iou.txt", "a", encoding='utf-8') as f:
            f.write('\n\n')
        if ram.size() >= 640:
            QNet_train(q, q_target, ram, q_optimizer)
        pi.train_policy()
        reward_100 += reward_all
        if train_step % 100 == 0 and train_step != 0:
            q_target.load_state_dict(q.state_dict())
            print("# of episode:{}, avg score : {:.1f}, buffer size:{}".format(
                train_step, reward_100 / 100, ram.size()))
            reward_100 = 0
        if train_step % 400 == 0 and train_step != 0:
            torch.save(
                q_target.state_dict(), '../models/Qnet/QLT/' +
                str(train_step + continue_epi) + '_Qnet.pth')
            torch.save(
                pi.state_dict(), '../models/Qnet/template_policy/' +
                str(train_step + continue_epi) + '_template_policy.pth')
            print("save model----{}".format(str(train_step + continue_epi)))
        if train_step % 10000 == 0:
            var = var * 0.95