import torch from dataloader import get_loader from cnn_extractor import CNN from torch.autograd import Variable from vis_tool import Visualizer model = CNN().cuda() # model.l model.load_state_dict(torch.load('cnn.pkl')) print(model) _, _, test_loader = get_loader('../Dataset/HV', '../Dataset/RV', '../Dataset/testRV') test_avg_acc = 0. test_cnt = 0 savelist = [] vis = Visualizer() for idx, (video, label, filename) in enumerate(test_loader): video = video[0] label = label[0] filename = filename[0] start = 0 end = 20 out_acc = 0.
class Trainer(object): def __init__(self, config, h_loader, r_loader, t_loader): self.config = config self.h_loader = h_loader self.r_loader = r_loader self.t_loader = t_loader self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.weight_decay = config.weight_decay self.n_epochs = config.n_epochs self.n_steps = config.n_steps self.log_interval = int(config.log_interval) # in case self.checkpoint_step = int(config.checkpoint_step) self.use_cuda = config.cuda self.outf = config.outf self.build_model() self.vis = vis_tool.Visualizer() def build_model(self): self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda() def train(self): # create optimizers cfig = get_config() opt = optim.RMSprop(filter(lambda p: p.requires_grad, self.gru.parameters()), lr=self.lr, weight_decay=self.weight_decay) start_time = time.time() criterion = nn.BCELoss() max_acc = 0. for epoch in range(self.n_epochs): epoch_loss = [] for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)): self.gru.train() #if step == 3: break h_video = h r_video = r # self.vis.img("h",h_video) # self.vis.img("r", r_video) # highlight video h_video = Variable(h_video).cuda() r_video = Variable(r_video).cuda() self.gru.zero_grad() h_loss = self.gru(h_video) # target = torch.ones(predicted.shape, dtype=torch.float32).cuda() # # h_loss = criterion(predicted, target) # compute loss # for n, p in self.gru.named_parameters(): # if "c2d" not in n: # print(n) # print(p) h_loss.backward() opt.step() # for n, p in self.gru.named_parameters(): # if "c2d" not in n: # print(n) # print(p) self.gru.zero_grad() step_end_time = time.time() epoch_loss.append((h_loss.data).cpu().numpy()) print('[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f' % (epoch + 1, self.n_epochs, step + 1, self.n_steps, step_end_time - start_time, h_loss)) self.vis.plot( 'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (h_loss.data).cpu().numpy()) # self.vis.plot('R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' # % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), # (r_loss.data).cpu().numpy()) self.vis.plot( "Avg loss plot with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f" % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), np.mean(epoch_loss)) if epoch % self.checkpoint_step == 0: accuracy, savelist = self.test(self.t_loader) if accuracy > max_acc: max_acc = accuracy torch.save( self.gru.state_dict(), './samples/lr_%.4f_chkpoint' % cfig.lr + str(epoch + 1) + '.pth') for f in savelist: np.save("./samples/" + f[0] + ".npy", f[1]) print(np.load("./samples/testRV04(198,360).npy")) print("checkpoint saved") def test(self, t_loader): self.gru.eval() accuracy = 0. savelist = [] total_len = len(t_loader) for step, (tv, label, filename) in enumerate(t_loader): filename = filename[0].split(".")[0] label = label.squeeze() start = 0 end = 24 correct = 0 count = 0 npy = np.zeros(tv.shape[1]) while end < tv.shape[1]: t_video = Variable(tv[:, start:end, :, :, :]).cuda() predicted = self.gru(t_video) gt_label = label[start:end] if len(gt_label[gt_label == 1.]) > 12: gt_label = torch.ones(predicted.shape, dtype=torch.float32).cuda() else: gt_label = torch.zeros(predicted.shape, dtype=torch.float32).cuda() if predicted < 0.5: npy[start:end] = 1. predicted[predicted < 0.5] = 1. predicted[predicted >= 0.5] = 0. correct += (predicted == gt_label).item() start += 24 end += 24 count += 1 accuracy += (correct / count) / total_len savelist.append([filename, npy]) print("Accuracy:", round(accuracy, 4)) self.vis.plot("Accuracy with lr:%.3f" % self.lr, accuracy) return accuracy, savelist
class Trainer(object): def __init__(self, config, h_loader, r_loader, t_loader): self.config = config self.h_loader = h_loader self.r_loader = r_loader self.t_loader = t_loader self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.weight_decay = config.weight_decay self.n_epochs = config.n_epochs self.n_steps = config.n_steps self.log_interval = int(config.log_interval) # in case self.checkpoint_step = int(config.checkpoint_step) self.use_cuda = config.cuda self.outf = config.outf self.build_model() self.vis = vis_tool.Visualizer() def build_model(self): self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda() self.gru.load_state_dict( torch.load('./samples/lr_0.0010_chkpoint1.pth') ) # load pre-trained cnn extractor def train(self): # create optimizers cfig = get_config() opt = optim.Adam(filter(lambda p: p.requires_grad, self.gru.parameters()), lr=self.lr, betas=(self.beta1, self.beta2), weight_decay=self.weight_decay) start_time = time.time() max_acc = 0. for epoch in range(self.n_epochs): epoch_loss = [] for step, h in enumerate(self.h_loader): # test모드 지나고 다시 train모드 self.gru.train() # if step == 3: break h_video = h # self.vis.img("h",h_video) # self.vis.img("r", r_video) # highlight video h_video = Variable(h_video).cuda() self.gru.zero_grad() h_loss = self.gru(h_video) h_loss.backward() opt.step() step_end_time = time.time() epoch_loss.append((h_loss.data).cpu().numpy()) print('[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f' % (epoch + 1, self.n_epochs, step + 1, self.n_steps, step_end_time - start_time, h_loss)) self.vis.plot( 'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (h_loss.data).cpu().numpy()) self.vis.plot( "Avg loss plot with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f" % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), np.mean(epoch_loss)) if epoch % self.checkpoint_step == 0: accuracy, savelist = self.test(self.t_loader) if accuracy > max_acc: max_acc = accuracy torch.save( self.gru.state_dict(), './samples/lr_%.4f_chkpoint' % cfig.lr + str(epoch + 1) + '.pth') for f in savelist: np.save("./samples/" + f[0] + ".npy", f[1]) print("checkpoint saved") def test(self, t_loader): # test mode self.gru.eval() accuracy = 0. savelist = [] total_len = len(t_loader) # test 데이터 개수 for step, (tv, label, filename) in enumerate(t_loader): filename = filename[0].split(".")[0] label = label.squeeze() # 진짜 highlight로 표시된 프레임들 start = 0 end = 30 # 30프레임 단위로 찢어본다. correct = [] ext_hv_frames = np.zeros(tv.shape[1]) # [0,0,0,0,0,00,.... 프레임수만큼.,,,0] while end < tv.shape[1]: # 갈수있는만큼 30프레임만큼 가라. t_video = Variable(tv[:, start:end, :, :, :]).cuda() loss = self.gru(t_video) # loss값. scalar. gt_label = label[start:end] # start~end까지 ground truth 갖고옴. if len(gt_label[gt_label == 1.]) > 24: gt_label = torch.ones(1, dtype=torch.float32).cuda() else: gt_label = torch.zeros(1, dtype=torch.float32).cuda() # 30프레임단위 24프레임이상 gt가 하이라이트면 그 단위도 하이라이트, 아니면 아님 if loss < 0.1: ext_hv_frames[start:end] = 1. # loss가 0.1보다 작으면 내가 보는 30단위는 다 1로 추출할거임. loss[loss < 0.1] = 1. loss[loss >= 0.1] = 0. correct.append((loss == gt_label).item()) # 24프레임 내에서의 정확도를 correct 추가함 start += 6 end += 6 accuracy += sum(correct) / len(correct) / total_len savelist.append([filename, ext_hv_frames]) print("Accuracy:", round(accuracy, 4)) self.vis.plot("Accuracy with lr:%.3f" % self.lr, accuracy) return accuracy, savelist
class Trainer(object): def __init__(self, config, h_loader, r_loader): self.config = config self.h_loader = h_loader self.r_loader = r_loader self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.weight_decay = config.weight_decay self.n_epochs = config.n_epochs self.n_steps = config.n_steps self.log_interval = int(config.log_interval) # in case self.checkpoint_step = int(config.checkpoint_step) self.use_cuda = config.cuda self.outf = config.outf self.build_model() self.vis = vis_tool.Visualizer() def build_model(self): self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor # for l,p in self.c2d.named_parameters(): # print(l) # self.c2d = nn.Sequential(*list(self.c2d.children())[:-1]) for l, p in self.c2d.named_parameters(): p.requires_grad = False print(l, p.requires_grad) # c2d_layer = list(self.c2d.children()) # fixed_layers = [] # # for param in self.c2d.parameters(): # param.requires_grad = False # print(param.requires_grad) # no trainable parameters # fixed_layers.append(layer) # # self.c2d = nn.Sequential(*fixed_layers).cuda() # 여기서 c2d fix 시키게 어떻게 함..? ㅠㅠㅠㅠㅠ self.gru = GRU(self.c2d).cuda() def train(self): # create optimizers cfig = get_config() opt = optim.Adam(filter(lambda p: p.requires_grad, self.gru.parameters()), lr=self.lr, betas=(self.beta1, self.beta2), weight_decay=self.weight_decay) start_time = time.time() self.gru.train() criterion = nn.BCEWithLogitsLoss() for epoch in range(self.n_epochs): epoch_loss = [] for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)): h_video = h[0] r_video = r[0] # highlight video h_video = Variable(h_video.cuda()) r_video = Variable(r_video.cuda()) self.gru.zero_grad() predicted = self.gru( h_video.cuda()) # predicted snippet's score # print("Predicted:", predicted) # print("Predicted shape:", predicted.shape) #print(predicted) target = torch.ones(len(predicted), dtype=torch.float64).cuda() # print("Target:", target) # print("Target shape:", target.shape) # target = torch.from_numpy( # np.ones([len(predicted)], dtype=np.float)).cuda() # highlight videos => target:1 h_loss = Variable(criterion(predicted, target), requires_grad=True) # compute loss h_loss.backward() opt.step() predicted = self.gru( r_video.cuda()) # predicted snippet's score target = torch.zeros(len(predicted), dtype=torch.float64).cuda() r_loss = Variable(criterion(predicted, target), requires_grad=True) # compute loss r_loss.backward() opt.step() step_end_time = time.time() total_loss = r_loss + h_loss epoch_loss.append((total_loss.data).cpu().numpy()) print( '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f' % (epoch + 1, self.n_epochs, step + 1, self.n_steps, step_end_time - start_time, h_loss, r_loss, total_loss)) self.vis.plot( 'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (h_loss.data).cpu().numpy()) self.vis.plot( 'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (r_loss.data).cpu().numpy()) self.vis.plot("Avg loss plot", np.mean(epoch_loss)) if epoch % self.checkpoint_step == 0: torch.save(self.gru.state_dict(), 'chkpoint' + str(epoch + 1) + '.pth') print("checkpoint saved")
class TestViewer(): """ test_video : test video 하나의 filename (각 파일명 맨 뒤에 ground true hv의 frame이 적혀있음) extracted_hv : test_video 랑 같은 제목, 다른 확장자(npy)를 가지는 filename. numpy array를 가지고 있으며 각 snippet(48fs)마다 0, 1값이 표시됨. 예상되는 애들은 00000011111111111000뭐 이런식인데[얘는 구현함] 0000011100111111100111이렇게 되는 경우도 생각해보자!! """ def __init__(self, test_video, extracted_hv, ckpt): self.test_video = test_video self.extracted_hv = extracted_hv self.ckpt = ckpt # test video를 frame별로 불러와서 numpy array로 test_raw에 저장함. cap = cv2.VideoCapture(self.test_video) frames = [] while True: ret, frame = cap.read() if ret: b, g, r = cv2.split(frame) frame = cv2.merge([r, g, b]) # HWC2CHW frame = frame.transpose(2, 0, 1) frames.append(frame) else: break cap.release() test_raw = np.concatenate(frames) self.test_raw = test_raw.reshape(-1, 3, 270, 480) def show(self, item=-1): if item == -1: self.showrv() self.showthv() self.showehv() elif item == 0: self.showrv() elif item == 1: self.showthv() elif item == 2: self.showehv() else: pass def showrv(self): viz0 = visdom.Visdom(use_incoming_socket=False) for f in range(0, self.test_raw.shape[0]): viz0.image( self.test_raw[f, :, :, :], win="gt video", opts={'title': 'TEST_RAW'}, ) time.sleep(0.01) def showthv(self): viz1 = visdom.Visdom(use_incoming_socket=False) # 이 과정은 test_true_hv를 보여주기 위해 test_raw에서 hv frame을 index함, filename = os.path.split(self.test_video)[-1] h_start = filename.index("(") h_end = filename.index(")") h_frames = filename[h_start + 1:h_end] # h_frames = "42, 120" or "nohv" if "," in h_frames: s, e = h_frames.split(',') h_start, h_end = int(s), int(e) else: h_start, h_end = 0, 0 for f in range(h_start, h_end): if (h_start == h_end): # no highlight라고 얘기하고 visdom에다가 싶은데? break viz1.image( self.test_raw[f, :, :, :], win="gt1 video", opts={'title': 'TEST_TRUE_HV'}, ) time.sleep(0.01) def showehv(self): viz2 = visdom.Visdom(use_incoming_socket=False) # 이 과정은 test_extracted_hv를 보여주기 위해 test_raw에서 hv frame을 index함. ext = np.load(self.extracted_hv) ext_idx = np.asarray(ext.nonzero()).squeeze() print(ext_idx[0], ext_idx[-1]) if ext_idx == []: e_start, e_end = 0, 0 else: e_start = ext_idx[0] * 6 e_end = ext_idx[-1] * 6 + 48 # "42, 120" 이라면 "7, 12" for f in range(e_start, e_end): if (e_start == e_end): # no highlight라고 얘기하고 visdom에다가 싶은데? break viz2.image( self.test_raw[f, :, :, :], win="gt2 video", opts={'title': 'TEST_Extracted_HV'}, ) time.sleep(0.01) def get_accuracy(self): # load dataloader _, _, t_l = get_loader('../Dataset/HV', '../Dataset/RV', '../Dataset/testRV', 1) # build network self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda() self.gru.load_state_dict(torch.load(self.ckpt)) print(self.gru) self.gru.eval() avg_acc = 0 for idx, (video, label) in enumerate(t_l): acc = 0. # forwarding test_video = Variable(video).cuda() predicted = self.gru(test_video) predicted = predicted.cpu().numpy() print('Predicted output:', predicted) # [forwarding score ....., backwarding score] print('Predicted output length:', len(predicted)) print('Actual label:', label) print('Actual label length:', len(label))