def predict(clip, model): if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transform = Compose([ Scale((150, 150)), #Scale(int(opt.sample_size / opt.scale_in_test)), #CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) if spatial_transform is not None: # spatial_transform.randomize_parameters() clip = [spatial_transform(img) for img in clip] clip = torch.stack(clip, dim=0) clip = clip.unsqueeze(0) with torch.no_grad(): print(clip.shape) outputs = model(clip) outputs = F.softmax(outputs) print(outputs) scores, idx = torch.topk(outputs, k=1) mask = scores > 0.6 preds = idx[mask] return preds
def pre_process_frame(frame, opt): # Convert from BGR opencv channel layout to RGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to pillow format for easy pre-processing frame = Image.fromarray(frame) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transforms_det = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) # Use torchvision transforms for compatibility with SSAR model spatial_transforms_clf = transforms.Compose([ transforms.Resize(opt.sample_size_clf), transforms.ToTensor(), transforms.Normalize(opt.mean_clf, opt.std_clf) ]) det_frame = spatial_transforms_det(frame) clf_frame = spatial_transforms_clf(frame) return det_frame, clf_frame
def __init__(self, model_file, sample_duration, model_type, cuda_id=0): self.opt = parse_opts() self.opt.model = model_type self.opt.root_path = './C3D_ResNet/data' self.opt.resume_path = os.path.join(self.opt.root_path, model_file) self.opt.pretrain_path = os.path.join(self.opt.root_path, 'models/resnet-18-kinetics.pth') self.opt.cuda_id = cuda_id self.opt.dataset = 'ucf101' self.opt.n_classes = 400 self.opt.n_finetune_classes = 3 self.opt.ft_begin_index = 4 self.opt.model_depth = 18 self.opt.resnet_shortcut = 'A' self.opt.sample_duration = sample_duration self.opt.batch_size = 1 self.opt.n_threads = 1 self.opt.checkpoint = 5 self.opt.arch = '{}-{}'.format(self.opt.model, self.opt.model_depth) self.opt.mean = get_mean(self.opt.norm_value, dataset=self.opt.mean_dataset) self.opt.std = get_std(self.opt.norm_value) # print(self.opt) print('Loading C3D action-recognition model..') self.model, parameters = generate_model(self.opt) # print(self.model) if self.opt.no_mean_norm and not self.opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not self.opt.std_norm: norm_method = Normalize(self.opt.mean, [1, 1, 1]) else: norm_method = Normalize(self.opt.mean, self.opt.std) if self.opt.resume_path: print(' loading checkpoint {}'.format(self.opt.resume_path)) checkpoint = torch.load(self.opt.resume_path) # assert self.opt.arch == checkpoint['arch'] self.opt.begin_epoch = checkpoint['epoch'] self.model.load_state_dict(checkpoint['state_dict']) self.spatial_transform = Compose([ ScaleQC(int(self.opt.sample_size / self.opt.scale_in_test)), CornerCrop(self.opt.sample_size, self.opt.crop_position_in_test), ToTensor(self.opt.norm_value), norm_method ]) self.target_transform = ClassLabel() self.model.eval()
def model_process(count, model): opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) #opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) #print(opt) #print(opt.result_path) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) #print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) print('testing is run') if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) tester.test(count, test_loader, model, opt, test_data.class_names)
def get_normalize_method(mean, std, no_mean_norm, no_std_norm): if no_mean_norm: if no_std_norm: return Normalize([0, 0, 0], [1, 1, 1]) else: return Normalize([0, 0, 0], std) else: if no_std_norm: return Normalize(mean, [1, 1, 1]) else: return Normalize(mean, std)
def get_loaders(opt): """ Make dataloaders for train and validation sets """ # train loader norm_method = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale((opt.sample_size, opt.sample_size)), Resize(256), CenterCrop(224), ToTensor(), norm_method ]) temporal_transform = TemporalRandomCrop(25) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) # validation loader target_transform = ClassLabel() temporal_transform = LoopPadding(25) validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) return train_loader, val_loader
def get_dataloader(opt): mean = [110.63666788 / 255, 103.16065604 / 255, 96.29023126 / 255] std = [1, 1, 1] norm_method = Normalize(mean, std) spatial_transform = Compose( [Scale(112), CornerCrop(112, 'c'), ToTensor(255), norm_method]) temporal_transform = LoopPadding(16) target_transform = ClassLabel() test_data = SurgicalDataset(os.path.abspath(opt.frames_path), os.path.abspath( opt.video_phase_annotation_path), opt.class_names, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) return test_loader
def __init__(self, root_dir, spatial_transform=None, seqLen=20, train=True, mulSeg=False, numSeg=1, fmt='.png', regression=True, numOrdClass=12): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) self.images, self.labels, self.numFrames = gen_split( root_dir, 5, train) # vedi sopra self.main_spatial_transform = spatial_transform # transformation di data augmentation self.spatial_transform_rgb = Compose( [self.main_spatial_transform, ToTensor(), normalize]) if regression == False: self.spatial_transform_mmaps = Compose([ self.main_spatial_transform, Scale(7), ToTensor(), Binary(0.4) ]) else: self.spatial_transform_mmaps = Compose( [self.main_spatial_transform, Scale(7), ToTensor()]) self.train = train self.mulSeg = mulSeg self.numSeg = numSeg self.seqLen = seqLen self.fmt = fmt self.numOrdClass = numOrdClass
def classify_video(video_dir, video_name, class_names, model, opt): # print("video_dir: {}, video_name: {}".format(video_dir,video_name)); assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] # video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) # video_segments.append(segments) if len(video_outputs) != 0: video_outputs = torch.cat(video_outputs) return video_outputs.numpy() else: return None
def extract_feature(opt, video_dir, C3D_model): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) load_image_fn = None data = Video(opt, video_dir, load_image_fn, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) c3d_features = [] for i, clip in enumerate(data_loader): print(clip.mean()) ## c3d feats clip = clip.to(opt.device) with torch.no_grad(): c3d_outputs = C3D_model(clip) # 汇总 c3d_features.append(c3d_outputs.cpu().data) # torch.Size([8, 512, 14, 14]) c3d_features = torch.cat(c3d_features, 0) # c3d feature of one video return c3d_features.cpu().numpy()
def get_loaders(opt): """ Make dataloaders for train and validation sets """ # train loader opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transform = Compose([ # crop_method, Scale((opt.sample_size, opt.sample_size)), # RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(16) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) # validation loader spatial_transform = Compose([ Scale((opt.sample_size, opt.sample_size)), # CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) target_transform = ClassLabel() temporal_transform = LoopPadding(16) validation_data = get_validation_set( opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) return train_loader, val_loader
def classify_video(video_dir, video_name, class_names, model, opt, annotation_digit=5): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) print('reading file from: ', video_dir, 'file name: ', video_name) video_outputs = [] video_segments = [] shit_lol = enumerate(data_loader) for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() clip_results['ground_truth_annotaion'] = annotation_digit results['clips'].append(clip_results) return results
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] print('video_name, class_names', video_name) spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] print('Running on video', video_dir) #print ('Data loader size', len(data_loader)) for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) print(i, inputs.size(), segments.shape) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) #print('Video outputs and segments', video_outputs) results = {'video': video_name, 'clips': []} if len(video_outputs) > 0: print('Video outputs and segments: ', video_outputs[0].shape) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) _, max_indices = video_outputs.max(dim=1) print('Video outputs', video_outputs.size()) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() results['clips'].append(clip_results) return results
def main_run(dataset,model_state_dict, dataset_dir, seqLen, memSize,stackSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), CenterCrop(224)]) spatial_transorm2 = Compose([Scale((7,7)), ToTensor()]) sequence = True vid_seq_test = makeDataset(dataset_dir, spatial_transorm2 ,spatial_transform=spatial_transform, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for j, (inputs, targets) in enumerate(test_loader): for flowX, flowY, inputs, targets in test_loader: inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) output_label, _ , flowXprediction , flowYprediction = model(inputVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = torch.true_divide(numCorr, test_samples) * 100 test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy) print(test_accuracy)
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration, stride=opt.stride) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) if len(video_outputs) == 0: with open("error.list", 'a') as fout: fout.write("{}\n".format(video_name)) return {} video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = { 'video': video_name, 'clips': [] } _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() results['clips'].append(clip_results) return results
def extract_feature(opt, video_dir, C3D_model, load_image_fn, C2D_model, c2d_shape, duration): assert opt.mode in ['score', 'feature'] C, H, W = c2d_shape spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) opt.num_segments = max(int(duration / opt.clip_len), 1) data = Video(opt, video_dir, load_image_fn, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=0, pin_memory=True) c3d_features = [] c2d_features = [] for i, (clip, frames_npy_data) in enumerate(data_loader): ## c3d feats clip = clip.to(opt.device) with torch.no_grad(): c3d_outputs = C3D_model(clip) frames = frames_npy_data.to(opt.device) with torch.no_grad(): c2d_outputs = C2D_model(frames).squeeze() if len(c2d_outputs.shape) == 1: c2d_outputs = c2d_outputs.unsqueeze(0) # 汇总 c3d_features.append(c3d_outputs.cpu().data) c2d_features.append(c2d_outputs.cpu().data) try: c3d_features = torch.cat(c3d_features) # c3d feature of one video c2d_features = torch.cat(c2d_features) # c3d feature of one video except: return None, None return c3d_features.cpu().numpy(), c2d_features.cpu().numpy()
def get_cam_visualisation(self, resnet, weight_softmax, input_pil_image, preprocess_for_viz=None, preprocess_for_model=None): if preprocess_for_viz == None: preprocess_for_viz = Compose([ Scale(256), CenterCrop(224), ]) if preprocess_for_model == None: normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) preprocess_for_model = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) tensor_image = preprocess_for_model(input_pil_image) pil_image = preprocess_for_viz(input_pil_image) logit, feature_conv, _ = resnet(tensor_image.unsqueeze(0).cuda()) bz, nc, h, w = feature_conv.size() feature_conv = feature_conv.view(bz, nc, h * w) h_x = F.softmax(logit, dim=1).data probs, idx = h_x.sort(1, True) cam_img = torch.bmm(weight_softmax[idx[:, 0]].unsqueeze(1), feature_conv).squeeze(1) cam_img = F.softmax(cam_img, 1).data cam_img = cam_img.cpu() cam_img = cam_img.reshape(h, w) cam_img = cam_img - torch.min(cam_img) cam_img = cam_img / torch.max(cam_img) cam_img = np.uint8(255 * cam_img) img = np.uint8(pil_image) output_cam = cv2.resize(cam_img, pil_image.size) heatmap = cv2.applyColorMap(output_cam, cv2.COLORMAP_JET) img = cv2.cvtColor(np.uint8(img), cv2.COLOR_RGB2BGR) result = heatmap * 0.4 + img * 0.6 result = cv2.cvtColor(np.uint8(result), cv2.COLOR_BGR2RGB) return Image.fromarray(result)
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} os.mkdir('features/' + video_name.split('.')[0]) mypath = 'features/' + video_name.split('.')[0] + '/' _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): with open(mypath + str(i) + '.txt', 'w+') as f: f.write(' '.join(map(str, video_outputs[i].tolist()))) return results
def extract_feats(file_path, net, filenames, frame_num, batch_size, save_path): """Extract 3D features (saved in .npy) for a video. """ net.eval() mean = get_mean(255, dataset='kinetics') std = get_std(255) transform = Compose([ trn.ToPILImage(), Scale(112), CornerCrop(112, 'c'), ToTensor(), Normalize(mean, std) ]) print("Network loaded") #Read videos and extract features in batches for file in filenames[start_idx:end_idx]: feat_file = os.path.join(save_path, file[:-4] + '.npy') if os.path.exists(feat_file): continue vid = imageio.get_reader(os.path.join(file_path, file), 'ffmpeg') curr_frames = [] for frame in vid: if len(frame.shape) < 3: frame = np.repeat(frame, 3) curr_frames.append(transform(frame).unsqueeze(0)) curr_frames = torch.cat(curr_frames, dim=0) print("Shape of frames: {0}".format(curr_frames.shape)) idx = np.linspace(0, len(curr_frames) - 1, frame_num).astype(int) print("Captured {} clips: {}".format(len(idx), curr_frames.shape)) curr_feats = [] for i in range(0, len(idx), batch_size): curr_batch = [ curr_frames[x - 8:x + 8, ...].unsqueeze(0) for x in idx[i:i + batch_size] ] curr_batch = torch.cat(curr_batch, dim=0).cuda() out = net(curr_batch.transpose(1, 2).cuda()) curr_feats.append(out.detach().cpu()) print("Appended {} features {}".format(i + 1, out.shape)) curr_feats = torch.cat(curr_feats, 0) del out #set_trace() np.save(feat_file, curr_feats.numpy()) print("Saved file {}\nExiting".format(file[:-4] + '.npy'))
def classify_video(video_dir, video_name, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) # results = { # 'video': video_name, # 'clips': [] # } clips = [] _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } clip_results['features'] = video_outputs[i].tolist() clips.append(clip_results) return video_name, clips
def classify_video(video_dir, video_name, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) if video_outputs: video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = dict() results['video'] = video_name results['features'] = video_outputs results['clips'] = video_segments return results
def eval(model): crop_method = GroupRandomScaleCenterCrop(size=(224, 224)) norm = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) spatial_transform = Compose( [crop_method, GroupRandomHorizontalFlip(), ToTensor(), norm]) temporal_transform = RandomCrop(size=16, stride=1) target_transform = Label() val_data = RWF2000('/content/RWF_2000/frames/', '/content/Action_Recognition' + '/RWF-2000.json', 'validation', spatial_transform, temporal_transform, target_transform, 'rwf-2000') # print(len(val_data)) val_loader = DataLoader(val_data, batch_size=16, shuffle=False, num_workers=4, pin_memory=True) criterion = nn.CrossEntropyLoss() val_loss, val_acc = val(val_loader, model, criterion)
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode == 'feature' spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) # video_segments = torch.cat(video_segments) results = [] for i in range(video_outputs.size(0)): clip_results = np.expand_dims(video_outputs[i].numpy(), axis=0) results.append(clip_results) results = np.concatenate(results, axis=0) return results
def __init__(self, root_dir, spatial_transform=None, seqLen=20, train=True, mulSeg=False, numSeg=1, fmt='.png', phase='train', regressor=False): self.images, self.maps, self.labels, self.numFrames = gen_split( root_dir, 5, phase) normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.spatial_transform0 = spatial_transform self.spatial_rgb = Compose( [self.spatial_transform0, ToTensor(), normalize]) if not (regressor): self.spatial_transform_map = Compose( [self.spatial_transform0, Scale(7), ToTensor(), Binary(0.4)]) else: self.spatial_transform_map = Compose( [self.spatial_transform0, Scale(7), ToTensor()]) self.train = train self.mulSeg = mulSeg self.numSeg = numSeg self.seqLen = seqLen self.fmt = fmt
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen, memSize, evalInterval, evalMode, numWorkers, outDir, fightsDir_train, noFightsDir_train, fightsDir_test, noFightsDir_test): train_dataset_dir_fights = fightsDir_train train_dataset_dir_noFights = noFightsDir_train test_dataset_dir_fights = fightsDir_test test_dataset_dir_noFights = noFightsDir_test trainDataset, trainLabels, trainNumFrames = make_split( train_dataset_dir_fights, train_dataset_dir_noFights) testDataset, testLabels, testNumFrames = make_split( test_dataset_dir_fights, test_dataset_dir_noFights) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vidSeqTrain = VideoDataset(trainDataset, trainLabels, trainNumFrames, spatial_transform=spatial_transform, seqLen=seqLen) trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) testBatchSize = 1 elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) testBatchSize = 1 vidSeqTest = VideoDataset(testDataset, testLabels, testNumFrames, seqLen=seqLen, spatial_transform=test_spatial_transform) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, shuffle=False, num_workers=int(numWorkers / 2), pin_memory=True) numTrainInstances = vidSeqTrain.__len__() numTestInstances = vidSeqTest.__len__() print('Number of training samples = {}'.format(numTrainInstances)) print('Number of testing samples = {}'.format(numTestInstances)) modelFolder = './experiments_' + outDir # Dir for saving models and log files # Create the dir if os.path.exists(modelFolder): print(modelFolder + ' exists!!!') sys.exit() else: os.makedirs(modelFolder) # Log files writer = SummaryWriter(modelFolder) trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w') trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w') testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w') testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w') model = ViolenceModel(mem_size=memSize) trainParams = [] for params in model.parameters(): params.requires_grad = True trainParams += [params] model.train(True) model.cuda() lossFn = nn.CrossEntropyLoss() optimizerFn = torch.optim.RMSprop(trainParams, lr=lr) optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize, decayRate) minAccuracy = 50 for epoch in range(numEpochs): optimScheduler.step() epochLoss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.train(True) print('Epoch = {}'.format(epoch + 1)) writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(trainLoader): iterPerEpoch += 1 optimizerFn.zero_grad() inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) outputLabel = model(inputVariable1) loss = lossFn(outputLabel, labelVariable) loss.backward() optimizerFn.step() outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epochLoss += loss.data[0] avgLoss = epochLoss / iterPerEpoch trainAccuracy = (numCorrTrain / numTrainInstances) * 100 print('Training: Loss = {} | Accuracy = {}% '.format( avgLoss, trainAccuracy)) writer.add_scalar('train/epochLoss', avgLoss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) trainLogLoss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avgLoss)) trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % evalInterval == 0: model.train(False) print('Evaluating...') testLossEpoch = 0 testIter = 0 numCorrTest = 0 for j, (inputs, targets) in enumerate(testLoader): testIter += 1 if evalMode == 'centerCrop': inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) else: inputVariable1 = Variable(inputs[0].cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) outputLabel = model(inputVariable1) outputLabel_mean = torch.mean(outputLabel, 0, True) testLoss = lossFn(outputLabel_mean, labelVariable) testLossEpoch += testLoss.data[0] _, predicted = torch.max(outputLabel_mean.data, 1) numCorrTest += (predicted == targets[0]).sum() testAccuracy = (numCorrTest / numTestInstances) * 100 avgTestLoss = testLossEpoch / testIter print('Testing: Loss = {} | Accuracy = {}% '.format( avgTestLoss, testAccuracy)) writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1) writer.add_scalar('test/accuracy', testAccuracy, epoch + 1) testLogLoss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avgTestLoss)) testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, testAccuracy)) if testAccuracy > minAccuracy: savePathClassifier = (modelFolder + '/bestModel.pth') torch.save(model, savePathClassifier) minAccuracy = testAccuracy trainLogAcc.close() testLogAcc.close() trainLogLoss.close() testLogLoss.close() writer.export_scalars_to_json(modelFolder + "/all_scalars.json") writer.close() return True
opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) print(opt) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters = generate_model(opt) # print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c'])
json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model = generate_model(opt) print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if not opt.no_train: spatial_transform = Compose([ MultiScaleCornerCrop(opt.scales, opt.sample_size), RandomHorizontalFlip(), ToTensor(opt.norm_value), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = TemporalRandomCrop(opt.sample_duration) target_transform = ClassLabel() if opt.dataset == 'kinetics': training_data = Kinetics(opt.video_path, opt.annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform) else: training_data = ActivityNet(opt.video_path, opt.annotation_path, 'training', spatial_transform=spatial_transform,
opt.arch = 'resnet-{}'.format(opt.model_depth) opt.mean = get_mean() opt.std = get_std() print(opt, flush=True) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) model, parameters, arch_parameters = generate_model(opt) print(model, flush=True) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: assert opt.train_crop in ['random', 'corner', 'center'] if opt.train_crop == 'random': crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'corner': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size) elif opt.train_crop == 'center': crop_method = MultiScaleCornerCrop(opt.scales, opt.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(opt.dataset), ToTensor(), norm_method
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict, stackSize, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, alphaX, alphaY): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join( './', out_dir, 'attConvLSTM', str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224) ]) spatial_transform2 = Compose([Scale((7, 7)), ToTensor()]) vid_seq_train = makeDataset(trainDatasetDir, spatial_transform2, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen) trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if valDatasetDir is not None: vid_seq_val = makeDataset(valDatasetDir, spatial_transform2, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: # stage == 2 model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict), strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fn_regression = nn.MSELoss() # Loss function for the regression model optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 x_loss = 0 y_loss = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) #for i, (inputs, targets) in enumerate(train_loader): for flowX, flowY, inputs, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowYprediction.view(-1) flowY = torch.reshape(flowY, (-1, )).float() #print(f'Prediction: {flowXprediction.size()}') #print(f'Input : {flowX.size()}') #sys.exit() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) loss = loss_fn(output_label, labelVariable) #Weighting the loss of the ss task #by multiplying it by alpha total_loss = loss + lossX + lossY total_loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() x_loss += lossX.item() y_loss += lossY.item() epoch_loss += loss.item() optim_scheduler.step() avg_x_loss = x_loss / iterPerEpoch avg_y_loss = y_loss / iterPerEpoch avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss)) print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1) writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1) train_log_loss.write('Training X loss after {} epoch= {}'.format( epoch + 1, avg_x_loss)) train_log_loss.write('Training Y loss after {} epoch= {}'.format( epoch + 1, avg_y_loss)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if valDatasetDir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_x_loss = 0 val_y_loss = 0 val_samples = 0 numCorr = 0 mmap_loss = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for flowX, flowY, inputs, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda(async=True)) #labelVariable = Variable(targets.cuda()) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowXprediction.view(-1) flowY = torch.reshape(flowX, (-1, )).float() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() val_x_loss += lossX.item() val_y_loss += lossY.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() avg_x_val_loss = val_x_loss / val_iter avg_y_val_loss = val_y_loss / val_iter val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val X Loss after {} epochs, loss = {}'.format( epoch + 1, avg_x_val_loss)) print('Val Y Loss after {} epochs, loss = {}'.format( epoch + 1, avg_y_val_loss)) print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1) writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val X Loss after {} epochs = {}\n'.format( epoch + 1, avg_x_val_loss)) val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format( epoch + 1, avg_y_val_loss)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
class_to_name[i] = class_to_name[i].replace(' ', '-') if args.dataset == 'ucf101': num_class = 101 args.n_classes = 101 img_prefix = 'image_' else: num_class = 174 args.n_classes = 174 img_prefix = '' whole_model, parameters = generate_model(args) print(whole_model) # input('...') if args.no_mean_norm and not args.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not args.std_norm: norm_method = Normalize(args.mean, [1, 1, 1]) else: norm_method = Normalize(args.mean, args.std) spatial_transform = Compose([ Scale(args.sample_size), CenterCrop(args.sample_size), ToTensor(args.norm_value), norm_method ]) # if not args.test_temp_crop == 'sparse': if args.compared_temp_transform == 'shuffle': temp_transform = ShuffleFrames(args.sample_duration) else: temp_transform = ReverseFrames(args.sample_duration)