示例#1
0
    def __init__(self, dataset='ucf101', split='train', clip_len=16, frame_mode=0, preprocess=False):

        self.clip_len = clip_len
        self.split = split
        self.frame_mode = frame_mode
        # The following three parameters are chosen as described in the paper section 4.1
        self.resize_height = 128
        self.resize_width = 171
        self.crop_size = 112

        self.root_dir, self.output_dir = PathSet.db_dir(dataset)
        folder = os.path.join(self.output_dir, split)

        if not self.check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' You need to download it from official website.')

        if (not self.check_preprocess()) or preprocess:
            print('Preprocessing of {} dataset, this will take long, but it will be done only once.'.format(dataset))
            self.preprocess()

        # Obtain all the filenames of files inside all the class folders
        # Going through each class folder one at a time
        self.fnames, labels = [], []
        for label in sorted(os.listdir(folder)):
            for fname in os.listdir(os.path.join(folder, label)):
                self.fnames.append(os.path.join(folder, label, fname))
                labels.append(label)

        assert len(labels) == len(self.fnames)
        print('Number of {} videos: {:d}'.format(split, len(self.fnames)))

        # Prepare a mapping between the label names (strings) and indices (ints)
        self.label2index = {label: index for index, label in enumerate(sorted(set(labels)))}

        # Convert the list of label names into an array of label indices
        self.label_array = np.array([self.label2index[label] for label in labels], dtype=int)

        if dataset == "ucf101":
            label_file = PathSet.root_dir() + '/ucf101_related/ucf_labels.txt'
            if not os.path.exists(label_file):
                with open(label_file, 'w') as f:
                    for id, label in enumerate(sorted(self.label2index)):
                        f.writelines(str(id+1) + ' ' + label + '\n')
        elif dataset == 'hmdb51':
            label_file = PathSet.root_dir() + '/hmdb51_related/hmdb_labels.txt'
            if not os.path.exists(label_file):
                with open(label_file, 'w') as f:
                    for id, label in enumerate(sorted(self.label2index)):
                        f.writelines(str(id+1) + ' ' + label + '\n')
        else:
            raise TypeError('unknown dataset ...')
示例#2
0
    def _get_videoinfo(self):
        ''' Get the classname and video name '''

        class_names = []
        with open(PathSet.label_dir(), 'r') as f:
            class_names = f.readlines()
            f.close()

        target_class = class_names[self.cls_id - 1].split(' ')[-1].strip()

        files_list = []
        _, dir = PathSet.db_dir(database=self.dataset)
        for file in os.listdir(os.path.join(dir, 'test', target_class)):
            files_list.append(file)

        if len(files_list) < self.video_id:
            self.video_id = len(files_list)

        video_name = files_list[self.video_id - 1]

        return class_names, target_class, video_name
示例#3
0
    def __load_pretrained_weights(self):
        ''' Initialiaze network '''

        corresp_name = {
            # Conv1
            "features.0.weight": "conv1.weight",
            "features.0.bias": "conv1.bias",
            # Conv2
            "features.3.weight": "conv2.weight",
            "features.3.bias": "conv2.bias",
            # Conv3a
            "features.6.weight": "conv3a.weight",
            "features.6.bias": "conv3a.bias",
            # Conv3b
            "features.8.weight": "conv3b.weight",
            "features.8.bias": "conv3b.bias",
            # Conv4a
            "features.11.weight": "conv4a.weight",
            "features.11.bias": "conv4a.bias",
            # Conv4b
            "features.13.weight": "conv4b.weight",
            "features.13.bias": "conv4b.bias",
            # Conv5a
            "features.16.weight": "conv5a.weight",
            "features.16.bias": "conv5a.bias",
            # Conv5b
            "features.18.weight": "conv5b.weight",
            "features.18.bias": "conv5b.bias",
            # fc6
            "classifier.0.weight": "fc6.weight",
            "classifier.0.bias": "fc6.bias",
            # fc7
            "classifier.3.weight": "fc7.weight",
            "classifier.3.bias": "fc7.bias",
        }

        p_dict = torch.load(PathSet.pretrained_model_dir())
        s_dict = self.state_dict()
        for name in p_dict:
            if name not in corresp_name:
                continue
            s_dict[corresp_name[name]] = p_dict[name]
        self.load_state_dict(s_dict)
示例#4
0
    def _pre_model(self):
        ''' Prepare the model '''

        if self.model_info is None:
            raise TypeError('NO SPECIFIC MODEL INFO ...')

        model = P3D_zoo(self.model_info).to(self.device)

        criterion = nn.CrossEntropyLoss().to(self.device)
        optimizer = optim.SGD(model.parameters(),
                              lr=self.lr,
                              momentum=0.9,
                              weight_decay=5e-4)

        if self.resume_epoch == 0:
            print("Training {} from scratch...".format(
                self.model_info['model_name']))
        else:
            resume_file = PathSet.model_dir(
                model_name=self.model_info['model_name'],
                cur_epochs=self.resume_epoch)

            checkpoint = torch.load(resume_file,
                                    map_location=lambda storage, loc: storage)

            print("Initializing weights from: {}...".format(
                resume_file.split('/')[-1]))
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['opt_dict'])

        print('Total params: %.2fM' %
              (sum(p.numel() for p in model.parameters()) / 1000000.0))

        model_cache = (model, criterion, optimizer)

        return model_cache
示例#5
0
    def _pre_model(self):
        ''' Prepare the model '''

        model, train_params = None, None

        if self.model_name == 'C3D':
            model = C3D_model.C3D(num_classes=self.num_classes, pretrained=self.pretrained)
            train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': self.lr},
                            {'params': C3D_model.get_10x_lr_params(model), 'lr': self.lr * 10}]
        else:
            raise TypeError('Unknown model name ...')

        model.to(self.device)
        criterion = nn.CrossEntropyLoss().to(self.device)

        optimizer = optim.SGD(train_params, lr=self.lr, momentum=0.9, weight_decay=5e-4)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

        if self.resume_epoch == 0:
            print("Training {} from scratch...".format(self.model_name))
        else:
            resume_file = PathSet.model_dir(model_name = self.model_name, cur_epochs =self.resume_epoch)

            checkpoint = torch.load(resume_file, map_location=lambda storage, loc: storage)

            print("Initializing weights from: {}...".format(resume_file.split('/')[-1]))
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['opt_dict'])

        print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))

        #model.to(self.device); criterion.to(self.device)

        model_cache = (model, criterion, optimizer, scheduler)

        return model_cache
示例#6
0
    def model_train(self):
        ''' train the C3D model according ot params_dict '''

        # step - 1
        model, criterion, optimizer = self._pre_model()

        # step - 2
        trainval_loaders, test_dataloader = self._pre_data()
        trainval_sizes = {
            x: len(trainval_loaders[x].dataset)
            for x in ['train', 'val']
        }
        test_size = len(test_dataloader.dataset)
        tainval_sizes, test_size = 0, 0
        log_dir = PathSet.log_dir()
        writer = SummaryWriter(log_dir=log_dir)

        # step - 3
        for epoch in range(self.resume_epoch, self.num_epochs):

            for phase in ['train', 'val']:

                if phase == 'train':
                    print('%s\nepoch_info : (%3d|%3d)\n%s' %
                          ('-' * 100, epoch + 1, self.num_epochs, '-' * 100))

                start_time = timeit.default_timer()
                running_loss, running_corrects = 0.0, 0.0

                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                for inputs, labels in tqdm(trainval_loaders[phase]):

                    # move inputs and labels to the device the training is taking place on
                    inputs = Variable(inputs,
                                      requires_grad=True).to(self.device)
                    labels = Variable(labels).to(self.device)
                    optimizer.zero_grad()  # optimizer.zero_grad()

                    if phase == 'train':
                        outputs = model(inputs)
                    else:
                        with torch.no_grad():
                            outputs = model(inputs)

                    probs = nn.Softmax(dim=1)(outputs)
                    preds = torch.max(probs, 1)[1]
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                epoch_loss = running_loss / trainval_sizes[phase]
                epoch_acc = running_corrects.double() / trainval_sizes[phase]

                if phase == 'train':
                    writer.add_scalar('monitor/train_loss_epoch', epoch_loss,
                                      epoch)
                    writer.add_scalar('monitor/train_acc_epoch', epoch_acc,
                                      epoch)
                else:
                    writer.add_scalar('monitor/val_loss_epoch', epoch_loss,
                                      epoch)
                    writer.add_scalar('monitor/val_acc_epoch', epoch_acc,
                                      epoch)

                stop_time = timeit.default_timer()
                exe_time = stop_time - start_time
                print("[%s]\texe_time:%.2f\tloss:%.4f\tacc:%.4f" %
                      (phase, exe_time, epoch_loss, epoch_acc))

            if (epoch + 1) % self.save_freq == 0:
                torch.save(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'opt_dict': optimizer.state_dict(),
                    }, PathSet.model_dir(self.model_name, epoch + 1))
                print("Save model at {}\n".format(
                    PathSet.model_dir(self.model_name, epoch + 1)))

            if self.useTest:
                self.model_infer(model, test_dataloader, writer, epoch)

        writer.close()
示例#7
0
    def gif_generator(self, params_dict):
        '''
        Generate a demo for show

        step - 1. prepare the target  video
        step - 2. load the arrchitecture and epoch_model
        step - 3. start inference
        '''

        # step - 1
        class_list, class_name, video_name = self._get_videoinfo()
        video_path = os.path.join(PathSet.root_dir(),
                                  'dataset/ucf101_related/UCF-101', class_name,
                                  video_name + '.avi')
        video = cv2.VideoCapture(video_path)

        # step - 2
        model = P3D_zoo(params_dict).to(self.device)
        model_path = PathSet.model_dir('P3D', self.epoch_id)
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])
        model.eval()

        # step - 3
        retaining, clip, text_imglist = True, [], []
        while retaining:

            retaining, frame = video.read()
            if not retaining and frame is None:
                continue

            tmp_ = self.center_crop(cv2.resize(frame, (171, 128)),
                                    size=(112, 112))
            tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])  # normalize
            clip.append(tmp)

            if len(clip) == 16:
                inputs = np.array(clip).astype(np.float32)
                inputs = np.expand_dims(inputs, axis=0)
                inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
                inputs = torch.from_numpy(inputs)
                inputs = torch.autograd.Variable(
                    inputs, requires_grad=False).to(self.device)
                with torch.no_grad():
                    outputs = model.forward(inputs)

                probs = torch.nn.Softmax(dim=1)(outputs)
                label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]

                cv2.putText(frame, class_list[label].split(' ')[-1].strip(),
                            (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                            (0, 0, 255), 1)
                cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
                text_imglist.append(frame)
                clip.pop(0)

            # cv2.imshow('test video', frame)
            # cv2.waitKey(2)

        gif_path = os.path.join(PathSet.root_dir(), 'model_demo/P3D/',
                                class_name + video_name + '.gif')
        imageio.mimsave(gif_path, text_imglist, fps=12)
        video.release()