示例#1
0
class Trainer(object):
    def __init__(self, config, h_loader, r_loader, t_loader):
        self.config = config
        self.h_loader = h_loader
        self.r_loader = r_loader
        self.t_loader = t_loader

        self.lr = config.lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay

        self.n_epochs = config.n_epochs
        self.n_steps = config.n_steps
        self.log_interval = int(config.log_interval)  # in case
        self.checkpoint_step = int(config.checkpoint_step)

        self.use_cuda = config.cuda
        self.outf = config.outf
        self.build_model()
        self.vis = vis_tool.Visualizer()

    def build_model(self):
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        for l, p in self.c2d.named_parameters():

            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()

    def train(self):
        # create optimizers
        cfig = get_config()
        opt = optim.RMSprop(filter(lambda p: p.requires_grad,
                                   self.gru.parameters()),
                            lr=self.lr,
                            weight_decay=self.weight_decay)

        start_time = time.time()

        criterion = nn.BCELoss()

        max_acc = 0.

        for epoch in range(self.n_epochs):
            epoch_loss = []
            for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)):
                self.gru.train()

                #if step == 3: break
                h_video = h
                r_video = r

                # self.vis.img("h",h_video)
                # self.vis.img("r", r_video)

                # highlight video
                h_video = Variable(h_video).cuda()
                r_video = Variable(r_video).cuda()

                self.gru.zero_grad()

                h_loss = self.gru(h_video)

                # target = torch.ones(predicted.shape, dtype=torch.float32).cuda()
                #
                # h_loss = criterion(predicted, target)  # compute loss

                # for n, p in self.gru.named_parameters():
                #     if "c2d" not in n:
                #         print(n)
                #         print(p)

                h_loss.backward()
                opt.step()

                # for n, p in self.gru.named_parameters():
                #     if "c2d" not in n:
                #         print(n)
                #         print(p)

                self.gru.zero_grad()

                step_end_time = time.time()

                epoch_loss.append((h_loss.data).cpu().numpy())

                print('[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f' %
                      (epoch + 1, self.n_epochs, step + 1, self.n_steps,
                       step_end_time - start_time, h_loss))

                self.vis.plot(
                    'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (h_loss.data).cpu().numpy())

                # self.vis.plot('R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f'
                #               % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                #               (r_loss.data).cpu().numpy())

            self.vis.plot(
                "Avg loss plot with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f" %
                (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                np.mean(epoch_loss))

            if epoch % self.checkpoint_step == 0:
                accuracy, savelist = self.test(self.t_loader)

                if accuracy > max_acc:
                    max_acc = accuracy
                    torch.save(
                        self.gru.state_dict(),
                        './samples/lr_%.4f_chkpoint' % cfig.lr +
                        str(epoch + 1) + '.pth')
                    for f in savelist:
                        np.save("./samples/" + f[0] + ".npy", f[1])
                    print(np.load("./samples/testRV04(198,360).npy"))
                    print("checkpoint saved")

    def test(self, t_loader):
        self.gru.eval()
        accuracy = 0.

        savelist = []

        total_len = len(t_loader)

        for step, (tv, label, filename) in enumerate(t_loader):
            filename = filename[0].split(".")[0]

            label = label.squeeze()

            start = 0
            end = 24

            correct = 0
            count = 0

            npy = np.zeros(tv.shape[1])

            while end < tv.shape[1]:

                t_video = Variable(tv[:, start:end, :, :, :]).cuda()
                predicted = self.gru(t_video)

                gt_label = label[start:end]

                if len(gt_label[gt_label == 1.]) > 12:
                    gt_label = torch.ones(predicted.shape,
                                          dtype=torch.float32).cuda()

                else:
                    gt_label = torch.zeros(predicted.shape,
                                           dtype=torch.float32).cuda()

                if predicted < 0.5:
                    npy[start:end] = 1.

                predicted[predicted < 0.5] = 1.
                predicted[predicted >= 0.5] = 0.

                correct += (predicted == gt_label).item()

                start += 24
                end += 24
                count += 1

            accuracy += (correct / count) / total_len

            savelist.append([filename, npy])

        print("Accuracy:", round(accuracy, 4))
        self.vis.plot("Accuracy with lr:%.3f" % self.lr, accuracy)

        return accuracy, savelist
示例#2
0
class Trainer(object):
    def __init__(self, config, h_loader, r_loader, t_loader):
        self.config = config
        self.h_loader = h_loader
        self.r_loader = r_loader
        self.t_loader = t_loader

        self.lr = config.lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay

        self.n_epochs = config.n_epochs
        self.n_steps = config.n_steps
        self.log_interval = int(config.log_interval)  # in case
        self.checkpoint_step = int(config.checkpoint_step)

        self.use_cuda = config.cuda
        self.outf = config.outf
        self.build_model()
        self.vis = vis_tool.Visualizer()

    def build_model(self):
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor
        for l, p in self.c2d.named_parameters():
            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()
        self.gru.load_state_dict(
            torch.load('./samples/lr_0.0010_chkpoint1.pth')
        )  # load pre-trained cnn extractor

    def train(self):
        # create optimizers
        cfig = get_config()
        opt = optim.Adam(filter(lambda p: p.requires_grad,
                                self.gru.parameters()),
                         lr=self.lr,
                         betas=(self.beta1, self.beta2),
                         weight_decay=self.weight_decay)

        start_time = time.time()

        max_acc = 0.

        for epoch in range(self.n_epochs):
            epoch_loss = []
            for step, h in enumerate(self.h_loader):
                # test모드 지나고 다시 train모드
                self.gru.train()

                # if step == 3: break
                h_video = h

                # self.vis.img("h",h_video)
                # self.vis.img("r", r_video)

                # highlight video
                h_video = Variable(h_video).cuda()

                self.gru.zero_grad()

                h_loss = self.gru(h_video)
                h_loss.backward()
                opt.step()

                step_end_time = time.time()

                epoch_loss.append((h_loss.data).cpu().numpy())

                print('[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f' %
                      (epoch + 1, self.n_epochs, step + 1, self.n_steps,
                       step_end_time - start_time, h_loss))

                self.vis.plot(
                    'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (h_loss.data).cpu().numpy())

            self.vis.plot(
                "Avg loss plot with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f" %
                (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                np.mean(epoch_loss))

            if epoch % self.checkpoint_step == 0:
                accuracy, savelist = self.test(self.t_loader)

                if accuracy > max_acc:
                    max_acc = accuracy
                    torch.save(
                        self.gru.state_dict(),
                        './samples/lr_%.4f_chkpoint' % cfig.lr +
                        str(epoch + 1) + '.pth')
                    for f in savelist:
                        np.save("./samples/" + f[0] + ".npy", f[1])
                    print("checkpoint saved")

    def test(self, t_loader):
        # test mode
        self.gru.eval()
        accuracy = 0.

        savelist = []

        total_len = len(t_loader)
        # test 데이터 개수

        for step, (tv, label, filename) in enumerate(t_loader):
            filename = filename[0].split(".")[0]

            label = label.squeeze()
            # 진짜 highlight로 표시된 프레임들

            start = 0
            end = 30
            # 30프레임 단위로 찢어본다.

            correct = []
            ext_hv_frames = np.zeros(tv.shape[1])
            # [0,0,0,0,0,00,.... 프레임수만큼.,,,0]

            while end < tv.shape[1]:  # 갈수있는만큼 30프레임만큼 가라.

                t_video = Variable(tv[:, start:end, :, :, :]).cuda()
                loss = self.gru(t_video)
                # loss값. scalar.

                gt_label = label[start:end]
                # start~end까지 ground truth 갖고옴.

                if len(gt_label[gt_label == 1.]) > 24:
                    gt_label = torch.ones(1, dtype=torch.float32).cuda()

                else:
                    gt_label = torch.zeros(1, dtype=torch.float32).cuda()
                    # 30프레임단위 24프레임이상 gt가 하이라이트면 그 단위도 하이라이트, 아니면 아님

                if loss < 0.1:
                    ext_hv_frames[start:end] = 1.
                    # loss가 0.1보다 작으면 내가 보는 30단위는 다 1로 추출할거임.

                loss[loss < 0.1] = 1.
                loss[loss >= 0.1] = 0.
                correct.append((loss == gt_label).item())
                # 24프레임 내에서의 정확도를 correct 추가함

                start += 6
                end += 6

            accuracy += sum(correct) / len(correct) / total_len

            savelist.append([filename, ext_hv_frames])

        print("Accuracy:", round(accuracy, 4))
        self.vis.plot("Accuracy with lr:%.3f" % self.lr, accuracy)

        return accuracy, savelist
示例#3
0
class Trainer(object):
    def __init__(self, config, h_loader, r_loader):
        self.config = config
        self.h_loader = h_loader
        self.r_loader = r_loader

        self.lr = config.lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay

        self.n_epochs = config.n_epochs
        self.n_steps = config.n_steps
        self.log_interval = int(config.log_interval)  # in case
        self.checkpoint_step = int(config.checkpoint_step)

        self.use_cuda = config.cuda
        self.outf = config.outf
        self.build_model()
        self.vis = vis_tool.Visualizer()

    def build_model(self):
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        # for l,p in self.c2d.named_parameters():
        #     print(l)

        # self.c2d = nn.Sequential(*list(self.c2d.children())[:-1])
        for l, p in self.c2d.named_parameters():

            p.requires_grad = False
            print(l, p.requires_grad)

        # c2d_layer = list(self.c2d.children())
        # fixed_layers = []
        #
        # for param in self.c2d.parameters():
        #     param.requires_grad = False
        #     print(param.requires_grad)
        # no trainable parameters
        #     fixed_layers.append(layer)
        #
        # self.c2d = nn.Sequential(*fixed_layers).cuda()

        # 여기서 c2d fix 시키게 어떻게 함..? ㅠㅠㅠㅠㅠ

        self.gru = GRU(self.c2d).cuda()

    def train(self):
        # create optimizers
        cfig = get_config()
        opt = optim.Adam(filter(lambda p: p.requires_grad,
                                self.gru.parameters()),
                         lr=self.lr,
                         betas=(self.beta1, self.beta2),
                         weight_decay=self.weight_decay)

        start_time = time.time()

        self.gru.train()

        criterion = nn.BCEWithLogitsLoss()

        for epoch in range(self.n_epochs):
            epoch_loss = []
            for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)):
                h_video = h[0]
                r_video = r[0]

                # highlight video
                h_video = Variable(h_video.cuda())
                r_video = Variable(r_video.cuda())

                self.gru.zero_grad()

                predicted = self.gru(
                    h_video.cuda())  # predicted snippet's score
                # print("Predicted:", predicted)
                # print("Predicted shape:", predicted.shape)
                #print(predicted)
                target = torch.ones(len(predicted), dtype=torch.float64).cuda()
                # print("Target:", target)
                # print("Target shape:", target.shape)
                # target = torch.from_numpy(
                #    np.ones([len(predicted)], dtype=np.float)).cuda()  # highlight videos => target:1
                h_loss = Variable(criterion(predicted, target),
                                  requires_grad=True)  # compute loss

                h_loss.backward()
                opt.step()

                predicted = self.gru(
                    r_video.cuda())  # predicted snippet's score
                target = torch.zeros(len(predicted),
                                     dtype=torch.float64).cuda()
                r_loss = Variable(criterion(predicted, target),
                                  requires_grad=True)  # compute loss

                r_loss.backward()
                opt.step()

                step_end_time = time.time()

                total_loss = r_loss + h_loss
                epoch_loss.append((total_loss.data).cpu().numpy())

                print(
                    '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f'
                    % (epoch + 1, self.n_epochs, step + 1, self.n_steps,
                       step_end_time - start_time, h_loss, r_loss, total_loss))

                self.vis.plot(
                    'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (h_loss.data).cpu().numpy())

                self.vis.plot(
                    'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (r_loss.data).cpu().numpy())

            self.vis.plot("Avg loss plot", np.mean(epoch_loss))

            if epoch % self.checkpoint_step == 0:
                torch.save(self.gru.state_dict(),
                           'chkpoint' + str(epoch + 1) + '.pth')
                print("checkpoint saved")
示例#4
0
class TestViewer():
    """
    test_video : test video 하나의 filename (각 파일명 맨 뒤에 ground true hv의 frame이 적혀있음)
    extracted_hv : test_video 랑 같은 제목, 다른 확장자(npy)를 가지는 filename. numpy array를 가지고 있으며 각 snippet(48fs)마다 0, 1값이 표시됨.
    예상되는 애들은 00000011111111111000뭐 이런식인데[얘는 구현함] 0000011100111111100111이렇게 되는 경우도 생각해보자!!
    """
    def __init__(self, test_video, extracted_hv, ckpt):

        self.test_video = test_video
        self.extracted_hv = extracted_hv
        self.ckpt = ckpt

        # test video를 frame별로 불러와서 numpy array로 test_raw에 저장함.
        cap = cv2.VideoCapture(self.test_video)
        frames = []
        while True:
            ret, frame = cap.read()
            if ret:
                b, g, r = cv2.split(frame)
                frame = cv2.merge([r, g, b])
                # HWC2CHW
                frame = frame.transpose(2, 0, 1)
                frames.append(frame)
            else:
                break
        cap.release()

        test_raw = np.concatenate(frames)
        self.test_raw = test_raw.reshape(-1, 3, 270, 480)

    def show(self, item=-1):
        if item == -1:
            self.showrv()
            self.showthv()
            self.showehv()
        elif item == 0:
            self.showrv()
        elif item == 1:
            self.showthv()
        elif item == 2:
            self.showehv()
        else:
            pass

    def showrv(self):

        viz0 = visdom.Visdom(use_incoming_socket=False)

        for f in range(0, self.test_raw.shape[0]):
            viz0.image(
                self.test_raw[f, :, :, :],
                win="gt video",
                opts={'title': 'TEST_RAW'},
            )
            time.sleep(0.01)

    def showthv(self):
        viz1 = visdom.Visdom(use_incoming_socket=False)
        # 이 과정은 test_true_hv를 보여주기 위해 test_raw에서 hv frame을 index함,
        filename = os.path.split(self.test_video)[-1]

        h_start = filename.index("(")
        h_end = filename.index(")")

        h_frames = filename[h_start + 1:h_end]
        # h_frames = "42, 120" or "nohv"

        if "," in h_frames:
            s, e = h_frames.split(',')
            h_start, h_end = int(s), int(e)

        else:
            h_start, h_end = 0, 0
        for f in range(h_start, h_end):
            if (h_start == h_end):
                # no highlight라고 얘기하고 visdom에다가 싶은데?
                break

            viz1.image(
                self.test_raw[f, :, :, :],
                win="gt1 video",
                opts={'title': 'TEST_TRUE_HV'},
            )
            time.sleep(0.01)

    def showehv(self):
        viz2 = visdom.Visdom(use_incoming_socket=False)
        # 이 과정은 test_extracted_hv를 보여주기 위해 test_raw에서 hv frame을 index함.
        ext = np.load(self.extracted_hv)
        ext_idx = np.asarray(ext.nonzero()).squeeze()
        print(ext_idx[0], ext_idx[-1])

        if ext_idx == []:
            e_start, e_end = 0, 0
        else:
            e_start = ext_idx[0] * 6
            e_end = ext_idx[-1] * 6 + 48
            # "42, 120" 이라면 "7, 12"

        for f in range(e_start, e_end):
            if (e_start == e_end):
                # no highlight라고 얘기하고 visdom에다가 싶은데?
                break

            viz2.image(
                self.test_raw[f, :, :, :],
                win="gt2 video",
                opts={'title': 'TEST_Extracted_HV'},
            )
            time.sleep(0.01)

    def get_accuracy(self):
        # load dataloader
        _, _, t_l = get_loader('../Dataset/HV', '../Dataset/RV',
                               '../Dataset/testRV', 1)

        # build network
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        for l, p in self.c2d.named_parameters():
            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()
        self.gru.load_state_dict(torch.load(self.ckpt))

        print(self.gru)

        self.gru.eval()
        avg_acc = 0

        for idx, (video, label) in enumerate(t_l):
            acc = 0.

            # forwarding
            test_video = Variable(video).cuda()
            predicted = self.gru(test_video)
            predicted = predicted.cpu().numpy()

            print('Predicted output:',
                  predicted)  # [forwarding score ....., backwarding score]
            print('Predicted output length:', len(predicted))
            print('Actual label:', label)
            print('Actual label length:', len(label))