def __init__(self, dataset='ucf101', split='train', clip_len=16, frame_mode=0, preprocess=False): self.clip_len = clip_len self.split = split self.frame_mode = frame_mode # The following three parameters are chosen as described in the paper section 4.1 self.resize_height = 128 self.resize_width = 171 self.crop_size = 112 self.root_dir, self.output_dir = PathSet.db_dir(dataset) folder = os.path.join(self.output_dir, split) if not self.check_integrity(): raise RuntimeError('Dataset not found or corrupted.' + ' You need to download it from official website.') if (not self.check_preprocess()) or preprocess: print('Preprocessing of {} dataset, this will take long, but it will be done only once.'.format(dataset)) self.preprocess() # Obtain all the filenames of files inside all the class folders # Going through each class folder one at a time self.fnames, labels = [], [] for label in sorted(os.listdir(folder)): for fname in os.listdir(os.path.join(folder, label)): self.fnames.append(os.path.join(folder, label, fname)) labels.append(label) assert len(labels) == len(self.fnames) print('Number of {} videos: {:d}'.format(split, len(self.fnames))) # Prepare a mapping between the label names (strings) and indices (ints) self.label2index = {label: index for index, label in enumerate(sorted(set(labels)))} # Convert the list of label names into an array of label indices self.label_array = np.array([self.label2index[label] for label in labels], dtype=int) if dataset == "ucf101": label_file = PathSet.root_dir() + '/ucf101_related/ucf_labels.txt' if not os.path.exists(label_file): with open(label_file, 'w') as f: for id, label in enumerate(sorted(self.label2index)): f.writelines(str(id+1) + ' ' + label + '\n') elif dataset == 'hmdb51': label_file = PathSet.root_dir() + '/hmdb51_related/hmdb_labels.txt' if not os.path.exists(label_file): with open(label_file, 'w') as f: for id, label in enumerate(sorted(self.label2index)): f.writelines(str(id+1) + ' ' + label + '\n') else: raise TypeError('unknown dataset ...')
def _get_videoinfo(self): ''' Get the classname and video name ''' class_names = [] with open(PathSet.label_dir(), 'r') as f: class_names = f.readlines() f.close() target_class = class_names[self.cls_id - 1].split(' ')[-1].strip() files_list = [] _, dir = PathSet.db_dir(database=self.dataset) for file in os.listdir(os.path.join(dir, 'test', target_class)): files_list.append(file) if len(files_list) < self.video_id: self.video_id = len(files_list) video_name = files_list[self.video_id - 1] return class_names, target_class, video_name
def __load_pretrained_weights(self): ''' Initialiaze network ''' corresp_name = { # Conv1 "features.0.weight": "conv1.weight", "features.0.bias": "conv1.bias", # Conv2 "features.3.weight": "conv2.weight", "features.3.bias": "conv2.bias", # Conv3a "features.6.weight": "conv3a.weight", "features.6.bias": "conv3a.bias", # Conv3b "features.8.weight": "conv3b.weight", "features.8.bias": "conv3b.bias", # Conv4a "features.11.weight": "conv4a.weight", "features.11.bias": "conv4a.bias", # Conv4b "features.13.weight": "conv4b.weight", "features.13.bias": "conv4b.bias", # Conv5a "features.16.weight": "conv5a.weight", "features.16.bias": "conv5a.bias", # Conv5b "features.18.weight": "conv5b.weight", "features.18.bias": "conv5b.bias", # fc6 "classifier.0.weight": "fc6.weight", "classifier.0.bias": "fc6.bias", # fc7 "classifier.3.weight": "fc7.weight", "classifier.3.bias": "fc7.bias", } p_dict = torch.load(PathSet.pretrained_model_dir()) s_dict = self.state_dict() for name in p_dict: if name not in corresp_name: continue s_dict[corresp_name[name]] = p_dict[name] self.load_state_dict(s_dict)
def _pre_model(self): ''' Prepare the model ''' if self.model_info is None: raise TypeError('NO SPECIFIC MODEL INFO ...') model = P3D_zoo(self.model_info).to(self.device) criterion = nn.CrossEntropyLoss().to(self.device) optimizer = optim.SGD(model.parameters(), lr=self.lr, momentum=0.9, weight_decay=5e-4) if self.resume_epoch == 0: print("Training {} from scratch...".format( self.model_info['model_name'])) else: resume_file = PathSet.model_dir( model_name=self.model_info['model_name'], cur_epochs=self.resume_epoch) checkpoint = torch.load(resume_file, map_location=lambda storage, loc: storage) print("Initializing weights from: {}...".format( resume_file.split('/')[-1])) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) model_cache = (model, criterion, optimizer) return model_cache
def _pre_model(self): ''' Prepare the model ''' model, train_params = None, None if self.model_name == 'C3D': model = C3D_model.C3D(num_classes=self.num_classes, pretrained=self.pretrained) train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': self.lr}, {'params': C3D_model.get_10x_lr_params(model), 'lr': self.lr * 10}] else: raise TypeError('Unknown model name ...') model.to(self.device) criterion = nn.CrossEntropyLoss().to(self.device) optimizer = optim.SGD(train_params, lr=self.lr, momentum=0.9, weight_decay=5e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) if self.resume_epoch == 0: print("Training {} from scratch...".format(self.model_name)) else: resume_file = PathSet.model_dir(model_name = self.model_name, cur_epochs =self.resume_epoch) checkpoint = torch.load(resume_file, map_location=lambda storage, loc: storage) print("Initializing weights from: {}...".format(resume_file.split('/')[-1])) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) #model.to(self.device); criterion.to(self.device) model_cache = (model, criterion, optimizer, scheduler) return model_cache
def model_train(self): ''' train the C3D model according ot params_dict ''' # step - 1 model, criterion, optimizer = self._pre_model() # step - 2 trainval_loaders, test_dataloader = self._pre_data() trainval_sizes = { x: len(trainval_loaders[x].dataset) for x in ['train', 'val'] } test_size = len(test_dataloader.dataset) tainval_sizes, test_size = 0, 0 log_dir = PathSet.log_dir() writer = SummaryWriter(log_dir=log_dir) # step - 3 for epoch in range(self.resume_epoch, self.num_epochs): for phase in ['train', 'val']: if phase == 'train': print('%s\nepoch_info : (%3d|%3d)\n%s' % ('-' * 100, epoch + 1, self.num_epochs, '-' * 100)) start_time = timeit.default_timer() running_loss, running_corrects = 0.0, 0.0 if phase == 'train': model.train() else: model.eval() for inputs, labels in tqdm(trainval_loaders[phase]): # move inputs and labels to the device the training is taking place on inputs = Variable(inputs, requires_grad=True).to(self.device) labels = Variable(labels).to(self.device) optimizer.zero_grad() # optimizer.zero_grad() if phase == 'train': outputs = model(inputs) else: with torch.no_grad(): outputs = model(inputs) probs = nn.Softmax(dim=1)(outputs) preds = torch.max(probs, 1)[1] loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / trainval_sizes[phase] epoch_acc = running_corrects.double() / trainval_sizes[phase] if phase == 'train': writer.add_scalar('monitor/train_loss_epoch', epoch_loss, epoch) writer.add_scalar('monitor/train_acc_epoch', epoch_acc, epoch) else: writer.add_scalar('monitor/val_loss_epoch', epoch_loss, epoch) writer.add_scalar('monitor/val_acc_epoch', epoch_acc, epoch) stop_time = timeit.default_timer() exe_time = stop_time - start_time print("[%s]\texe_time:%.2f\tloss:%.4f\tacc:%.4f" % (phase, exe_time, epoch_loss, epoch_acc)) if (epoch + 1) % self.save_freq == 0: torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict(), }, PathSet.model_dir(self.model_name, epoch + 1)) print("Save model at {}\n".format( PathSet.model_dir(self.model_name, epoch + 1))) if self.useTest: self.model_infer(model, test_dataloader, writer, epoch) writer.close()
def gif_generator(self, params_dict): ''' Generate a demo for show step - 1. prepare the target video step - 2. load the arrchitecture and epoch_model step - 3. start inference ''' # step - 1 class_list, class_name, video_name = self._get_videoinfo() video_path = os.path.join(PathSet.root_dir(), 'dataset/ucf101_related/UCF-101', class_name, video_name + '.avi') video = cv2.VideoCapture(video_path) # step - 2 model = P3D_zoo(params_dict).to(self.device) model_path = PathSet.model_dir('P3D', self.epoch_id) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) model.eval() # step - 3 retaining, clip, text_imglist = True, [], [] while retaining: retaining, frame = video.read() if not retaining and frame is None: continue tmp_ = self.center_crop(cv2.resize(frame, (171, 128)), size=(112, 112)) tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]]) # normalize clip.append(tmp) if len(clip) == 16: inputs = np.array(clip).astype(np.float32) inputs = np.expand_dims(inputs, axis=0) inputs = np.transpose(inputs, (0, 4, 1, 2, 3)) inputs = torch.from_numpy(inputs) inputs = torch.autograd.Variable( inputs, requires_grad=False).to(self.device) with torch.no_grad(): outputs = model.forward(inputs) probs = torch.nn.Softmax(dim=1)(outputs) label = torch.max(probs, 1)[1].detach().cpu().numpy()[0] cv2.putText(frame, class_list[label].split(' ')[-1].strip(), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1) text_imglist.append(frame) clip.pop(0) # cv2.imshow('test video', frame) # cv2.waitKey(2) gif_path = os.path.join(PathSet.root_dir(), 'model_demo/P3D/', class_name + video_name + '.gif') imageio.mimsave(gif_path, text_imglist, fps=12) video.release()