def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./dataloaders/aps_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    model = C3D_model.C3D(num_classes=24)
    checkpoint = torch.load(weight, map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    model.to(device)
    model.eval()

    cap = cv2.VideoCapture(video)
    retaining = True
    wait = False
    
    clip = []
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs, requires_grad=False).to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]
            
            probs = probs[0][label]
            name = class_names[label].split(' ')[-1].strip()
            
            cv2.putText(frame, name, (20, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (0, 0, 255), 1)
            cv2.putText(frame, "prob: %.4f" % probs, (20, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (0, 0, 255), 1)
            clip.pop(0)
            
            if wait == True and probs < 0.4:
                wait = False
                
            if wait == False and probs > 0.9:
                print(name)
                wait = True

        cv2.imshow('result', frame)
        cv2.waitKey(30)

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 2
0
def setup(ckpt_path: str) -> nn.Module:
    '''配置模型的GPU/CPU环境

    Args:
        ckpt_path(str): 模型检查点路径
    Return:
        model(nn.Module): 配置好的模型
        device(torch.device): 模型可用的环境
    '''
    model = C3D_model.C3D(num_classes=2)
    use_cuda = torch.cuda.is_available()
    device_name = 'cuda:0' if use_cuda else 'cpu'
    device = torch.device(device_name)

    params = {} if use_cuda else {'map_location': 'cpu'}

    # 恢复检查点
    if ckpt_path is not None:
        assert os.path.exists(ckpt_path), '无效的路径{}'.format(ckpt_path)
        ckpt = torch.load(ckpt_path, **params)
        model.load_state_dict(ckpt['state_dict'])
    model.to(device)
    model.eval()

    return model, device
def predict_c3d(video):
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # print("Device being used:", device)

    with open('./dataloaders/ucf_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    model = C3D_model.C3D(num_classes=101)
    checkpoint = torch.load('run/run_3/models/C3D-ucf101_epoch-99.pth',
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    # model.to(device)
    model.eval()

    # read video
    # video = '/Path/to/UCF-101/ApplyLipstick/v_ApplyLipstick_g04_c02.avi'
    cap = cv2.VideoCapture(video)
    retaining = True

    clip = []
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs, requires_grad=False)
            # .to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]
            prob = "%.4f" % probs[0][label]
            Label = class_names[label].split(' ')[-1].strip()

        #     cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 20),
        #                  cv2.FONT_HERSHEY_SIMPLEX, 0.6,
        #                 (0, 0, 255), 1)
        #     cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 40),
        #                 cv2.FONT_HERSHEY_SIMPLEX, 0.6,
        #                 (0, 0, 255), 1)
        #     clip.pop(0)
        #
        # cv2.imshow('result', frame)
        # cv2.waitKey(30)
    #
    # cap.release()
    # cv2.destroyAllWindows()

    return Label, prob
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./dataloaders/ucf_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    model = C3D_model.C3D(num_classes=101)
    checkpoint = torch.load('run/run_10/models/C3D-ucf101_epoch-99.pth.tar', map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    model.to(device)
    model.train()

    # read video
    video = '/data/Sadjad/Datasets/ucf101/v_WritingOnBoard_g03_c03.avi'
    # '/data/Sadjad/Datasets/DALY/download_videos/videos/3\ WAYS\ OF\ APPLYING\ RED\ LIPSTICK\ l\ Pearltji-YCqSlzeFvn4.mp4'
    # '/data/Sadjad/Datasets/ucf101/UCF-101/StillRings/v_StillRings_g04_c02.avi'
    cap = cv2.VideoCapture(0) # video) # 
    retaining = True

    clip = []
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs, requires_grad=False).to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]

            cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 2.0,
                        (0, 0, 255), 1)
            cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (0, 0, 255), 1)
            clip.pop(0)

        cv2.imshow('result', frame)
        cv2.waitKey(30)

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 5
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./dataloaders/ucf_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    model = C3D_model.C3D(num_classes=2)
    checkpoint = torch.load(
        '/home/zhouzhilong/pytorch-video-recognition/run/run_5/models/C3D-ucf101_epoch-199.pth.tar',
        map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    model.to(device)
    model.eval()
    # read video
    video = '/hhd12306/zhouzhilong/UCF-101/ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c01.avi'
    cap = cv2.VideoCapture(video)
    retaining = True
    clip = []
    print(cap.isOpened())
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs,
                                             requires_grad=False).to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]
            cv2.putText(frame, class_names[label].split(' ')[-1].strip(),
                        (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255),
                        1)
            cv2.putText(frame, "prob: %.4f" % probs[0][label], (40, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)
            clip.pop(0)
            print(class_names[label].split(' ')[-1].strip())
        cv2.imshow('result', frame)
        cv2.waitKey(30)

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 6
0
    def _pre_model(self):
        ''' Prepare the model '''

        model, train_params = None, None

        if self.model_name == 'C3D':
            model = C3D_model.C3D(num_classes=self.num_classes, pretrained=self.pretrained)
            train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': self.lr},
                            {'params': C3D_model.get_10x_lr_params(model), 'lr': self.lr * 10}]
        else:
            raise TypeError('Unknown model name ...')

        model.to(self.device)
        criterion = nn.CrossEntropyLoss().to(self.device)

        optimizer = optim.SGD(train_params, lr=self.lr, momentum=0.9, weight_decay=5e-4)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

        if self.resume_epoch == 0:
            print("Training {} from scratch...".format(self.model_name))
        else:
            resume_file = PathSet.model_dir(model_name = self.model_name, cur_epochs =self.resume_epoch)

            checkpoint = torch.load(resume_file, map_location=lambda storage, loc: storage)

            print("Initializing weights from: {}...".format(resume_file.split('/')[-1]))
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['opt_dict'])

        print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))

        #model.to(self.device); criterion.to(self.device)

        model_cache = (model, criterion, optimizer, scheduler)

        return model_cache
def test_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, num_epochs=nEpochs):
    torch.cuda.empty_cache()
    model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
    model.load_state_dict(torch.load(os.path.listdir(save_dir)[-1]))
    params = model.parameters()

    criterion = nn.CrossEntropyLoss()
    test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=batch_size, num_workers=num_workers)
    test_size = len(test_dataloader.dataset)

    model.to(device)
    criterion.to(device)
    #print(model)

    model.eval()
    start_time = timeit.default_timer()

    running_loss = 0.0
    running_corrects = 0.0

    for inputs, labels in tqdm(test_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = model(inputs)
        probs = nn.Softmax(dim=1)(outputs)
        preds = torch.max(probs, 1)[1]
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    loss = running_loss / test_size
    acc = running_corrects.double() / test_size

    print("[test] Loss: {} Acc: {}".format(loss, acc)
    stop_time = timeit.default_timer()
    print("Execution time: " + str(stop_time - start_time) + "\n")



if __name__ == "__main__":
    test_model()
def main():
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")  #use the second gpu
    print("Device being used:", device)

    with open('actions_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    #print(class_names)
    # init model
    model = C3D_model.C3D(num_classes=48)
    model = nn.DataParallel(model)
    checkpoint = torch.load('C3D39.pth.tar',
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    model.to(device)
    model.eval()

    # read video
    video = 'P08_tea.avi'
    cap = cv2.VideoCapture(video)
    retaining = True

    clip = []
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs,
                                             requires_grad=False).to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)

            top5probs, labels = torch.topk(probs, 5)

            #action 1 shown
            cv2.putText(frame,
                        class_names[labels[0][0]].split(' ')[-1].strip(),
                        (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 5),
                        1, cv2.LINE_AA)
            cv2.putText(frame, "prob: %.4f" % top5probs[0][0], (110, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (30, 30, 255), 1,
                        cv2.LINE_AA)

            #action 2 shown
            cv2.putText(frame,
                        class_names[labels[0][1]].split(' ')[-1].strip(),
                        (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 5),
                        1, cv2.LINE_AA)
            cv2.putText(frame, "prob: %.4f" % top5probs[0][1], (110, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (30, 30, 255), 1,
                        cv2.LINE_AA)

            #action 3 shown
            cv2.putText(frame,
                        class_names[labels[0][2]].split(' ')[-1].strip(),
                        (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 5),
                        1, cv2.LINE_AA)
            cv2.putText(frame, "prob: %.4f" % top5probs[0][2], (110, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (30, 30, 255), 1,
                        cv2.LINE_AA)

            #action 4 shown
            cv2.putText(frame,
                        class_names[labels[0][3]].split(' ')[-1].strip(),
                        (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 5),
                        1, cv2.LINE_AA)
            cv2.putText(frame, "prob: %.4f" % top5probs[0][3], (110, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (30, 30, 255), 1,
                        cv2.LINE_AA)

            #action 5 shown
            cv2.putText(frame,
                        class_names[labels[0][4]].split(' ')[-1].strip(),
                        (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.4,
                        (255, 255, 5), 1, cv2.LINE_AA)
            cv2.putText(frame, "prob: %.4f" % top5probs[0][4], (110, 100),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (30, 30, 255), 1,
                        cv2.LINE_AA)
            clip.pop(0)

        cv2.imshow('result', frame)
        cv2.waitKey(30)

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 9
0
def start():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    nEpochs = 101  # 训练集epoch
    resume_epoch = 0  # 是否从头开始训练,0表示从头,如果是99,表示从99次训练之后的模型开始训练
    useTest = True  # 训练的时候是否看下训练集的效果
    nTestInterval = 20  # 训练集的epoch
    snapshot = 25  # 训练多少个epoch之后保存
    lr = 1e-5  # Learning rate

    dataset = "ucf101"
    num_classes = 101

    # 获取当前路径和文件所属的目录
    save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__)))
    # videoprocess
    exp_name = os.path.dirname(os.path.abspath(__file__)).split('/')[-1]

    # 如果是要从之前的model开始训练,拿到最后一次run的id,如果从头开始,最后一次的id再加1
    # glob.glob()函数的作用:将拼接的符合的文件或者文件夹名字全部读取
    if resume_epoch != 0:
        runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) if runs else 0
    else:
        runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0

    save_dir = os.path.join(save_dir_root, 'run', 'run_' + str(run_id))
    modelName = 'C3D'
    saveName = modelName + '-' + dataset

    # 创建模型
    model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
    train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                    {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]

    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
            map_location=lambda storage, loc: storage)  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    # 查看网络的参数个数
    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=16), batch_size=6, shuffle=True,
                                  num_workers=0)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val', clip_len=16), batch_size=6, num_workers=0)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=6, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, nEpochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()
            # tqdm可以展示训练的进度条
            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = torch.tensor(inputs, requires_grad=True).to(device)
                labels = torch.tensor(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % snapshot == (snapshot - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % nTestInterval == (nTestInterval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Exemplo n.º 10
0
    def gif_generator(self):
        '''
        Generate a demo for show

        step - 1. prepare the target  video
        step - 2. load the arrchitecture and epoch_model
        step - 3. start inference
        '''

        # step - 1
        class_list, class_name, video_name = self._get_videoinfo()
        video_path = os.path.join(PathSet.root_dir(),
                                  'dataset/ucf101_related/UCF-101', class_name,
                                  video_name + '.avi')
        video = cv2.VideoCapture(video_path)

        # step - 2
        model = C3D_model.C3D(num_classes=101).to(self.device)
        model_path = PathSet.model_dir('C3D', self.epoch_id)
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])
        model.eval()

        # step - 3
        retaining, clip, text_imglist = True, [], []
        while retaining:

            retaining, frame = video.read()
            if not retaining and frame is None:
                continue

            tmp_ = self.center_crop(cv2.resize(frame, (171, 128)),
                                    size=(112, 112))
            tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])  # normalize
            clip.append(tmp)

            if len(clip) == 16:
                inputs = np.array(clip).astype(np.float32)
                inputs = np.expand_dims(inputs, axis=0)
                inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
                inputs = torch.from_numpy(inputs)
                inputs = torch.autograd.Variable(
                    inputs, requires_grad=False).to(self.device)
                with torch.no_grad():
                    outputs = model.forward(inputs)

                probs = torch.nn.Softmax(dim=1)(outputs)
                label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]

                cv2.putText(frame, class_list[label].split(' ')[-1].strip(),
                            (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                            (0, 0, 255), 1)
                cv2.putText(frame, "prob: %.4f" % probs[0][label], (20, 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
                text_imglist.append(frame)
                clip.pop(0)

            # cv2.imshow('test video', frame)
            # cv2.waitKey(2)

        gif_path = os.path.join(PathSet.root_dir(), 'model_demo/C3D/',
                                class_name + video_name + '.gif')
        imageio.mimsave(gif_path, text_imglist, fps=12)
        video.release()
Exemplo n.º 11
0
def main():
    print ("device is available : ", torch.cuda.is_available())

    with open("./dataloaders/ucf_labels.txt", 'r') as f:
        class_names = f.readlines()
        f.close()

    # input model
    model = C3D_model.C3D(num_classes = 101)
    # 加载模型
    checkpoint = torch.load(
        './run/run_0/models/C3D-ucf101_epoch-99.pth.tar',
        map_location=lambda storage, loc: storage)

    model.load_state_dict(checkpoint['state_dict'])

    model.cuda()
    model.eval()

    video = './CliffDiving.mp4'
    cap = cv2.VideoCapture(video)
    retaining = True
    # 该参数是MPEG-4编码类型,后缀名为avi
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('Output_CliffDiving.avi', fourcc, 20.0, (320, 240))

    clip = []

    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue

        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = inputs.cuda()

            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]

            cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (10, 205),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        (138, 43, 226), 2)
            cv2.putText(frame, "Prob: %.4f" % probs[0][label], (10, 230),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        (138, 43, 226), 2)
            clip.pop(0)

        if retaining == True:

            # write the flipped frame
            out.write(frame)

            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
        cap.release()
        out.release()
        cv2.destroyAllWindows()
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot):
    torch.cuda.empty_cache()
    ##model = C3D_model.C3D_Dilation(num_classes=num_classes, pretrained=False)
    model = C3D_model.C3D(num_classes=num_classes, pretrained=False)

    train_params = model.parameters()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    print("Training {} from scratch...".format(modelName))
    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)
    #print(model)

    log_dir = os.path.join(
        save_dir, 'models',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16),
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                             split='val',
                                             clip_len=16),
                                batch_size=batch_size,
                                num_workers=num_workers)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {
        x: len(trainval_loaders[x].dataset)
        for x in ['train', 'val']
    }

    for epoch in range(num_epochs):

        for phase in ['train', 'val']:

            confusion_matrix = torch.zeros(num_classes, num_classes)

            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            if phase == 'train':
                model.train()
                #scheduler.step()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device=device, dtype=torch.int64)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                for t, p in zip(labels.view(-1), preds.view(-1)):
                    confusion_matrix[t.long(), p.long()] += 1

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
                phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

            print(confusion_matrix.diag() / confusion_matrix.sum(1))

        if epoch % save_epoch == (save_epoch - 1):
            PATH = os.path.join(
                save_dir, saveName + '_epoch-' + str(epoch + 1) + '.pth.tar')
            torch.save(model.state_dict(), PATH)
            print("Save model at {}\n".format(PATH))

    writer.close()
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./dataloaders/ucf_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    model = C3D_model.C3D(num_classes=101)
    checkpoint = torch.load('run/run_10/models/C3D-ucf101_epoch-99.pth.tar', map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])
    model.to(device)
    model.train()

    # read video
    # video = '/data/Sadjad/Datasets/ucf101/UCF-101/HandstandWalking/v_HandstandWalking_g22_c04.avi' # Haircut/v_Haircut_g03_c01.avi' # FrontCrawl/v_FrontCrawl_g07_c05.avi' # HulaHoop/v_HulaHoop_g02_c04.avi' # Biking/v_Biking_g06_c05.avi' # GolfSwing/v_GolfSwing_g11_c01.avi'
    # hmdb51/videos/dive/Extreme_Cliffdiving_dive_f_cm_np1_le_bad_2.avi'
    # ucf101/UCF-101/HighJump/v_HighJump_g07_c03.avi' # Skiing/v_Skiing_g09_c05.avi' # Archery/v_Archery_g01_c03.avi'
    #  '/data/Sadjad/Datasets/ucf101/UCF-101/Knitting/v_Knitting_g03_c05.avi'
    # '/data/Sadjad/Datasets/DALY/download_videos/videos/3\ WAYS\ OF\ APPLYING\ RED\ LIPSTICK\ l\ Pearltji-YCqSlzeFvn4.mp4'
    # '/data/Sadjad/Datasets/ucf101/UCF-101/StillRings/v_StillRings_g04_c02.avi'
    # capwrite = cv2.VideoCapture()
    cap = cv2.VideoCapture(0) # video)
    # fourcc = cv2.VideoWriter_fourcc(*'DIVX') # MJPG')
    #size = (
	#int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
	#int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	#)
    # out = cv2.VideoWriter('6.mp4',fourcc, 25, size)
    # out = cv2.VideoWriter('output.avi',fourcc, 20.0, (640,480))
    
    retaining = True

    clip = []
    while retaining:
        retaining, frame = cap.read()
        if not retaining and frame is None:
            continue
        tmp_ = center_crop(cv2.resize(frame, (171, 128)))
        tmp = tmp_ - np.array([[[90.0, 98.0, 102.0]]])
        clip.append(tmp)
        if len(clip) == 16:
            inputs = np.array(clip).astype(np.float32)
            inputs = np.expand_dims(inputs, axis=0)
            inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
            inputs = torch.from_numpy(inputs)
            inputs = torch.autograd.Variable(inputs, requires_grad=False).to(device)
            with torch.no_grad():
                outputs = model.forward(inputs)

            probs = torch.nn.Softmax(dim=1)(outputs)
            label = torch.max(probs, 1)[1].detach().cpu().numpy()[0]

            cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 210),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                        (255, 0, 0), 1)
            cv2.putText(frame, "acc: %.4f" % probs[0][label], (20, 230),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                        (255, 0, 0), 1)
            clip.pop(0)

        # out.write(frame)
        cv2.imshow('result', frame)
        #cv2.waitKey(30)

    # out.release()
    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 14
0
def train_model(dataset=dataset, save_dir=SAVE_FILE_FOLDER, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=IF_PRETRAIN)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'C3D_td5':
        model = C3D_model.C3D_td5(num_classes=num_classes, pretrained=IF_PRETRAIN)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = model.parameters()
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    if _optimizer == "SGD":
        optimizer = optim.SGD(train_params, lr=lr, momentum=MOMENTUM, weight_decay=WD)
    elif _optimizer == "Adam":
        optimizer = optim.Adam(train_params, lr=lr, weight_decay=WD)
    # print(optimizer)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=SCHEDULER_STEP_SIZE,
                                          gamma=SCHEDULER_GAMMA)  # the scheduler divides the lr by 10 every 10 epochs

    model.to(device)
    criterion.to(device)

    # if resume_epoch == 0:
    if resume_model_path == None:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(
            resume_model_path,
            map_location=lambda storage, loc: storage)  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(resume_model_path))
        model.load_state_dict(checkpoint['state_dict'])
        if RESUM_OPTIMIZER:
            optimizer.load_state_dict(checkpoint['opt_dict'])
        # resume_epoch
    # else:
    #     checkpoint = torch.load(os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
    #                             map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
    #     print("Initializing weights from: {}...".format(
    #         os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
    #     model.load_state_dict(checkpoint['state_dict'])
    #     optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    writer = SummaryWriter(logdir=LOG_PATH)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=clip_len, preprocess=IF_PREPROCESS_TRAIN, grayscale=grayscale), batch_size=BS, shuffle=True, num_workers=N_WORKERS)
    val_dataloader   = DataLoader(VideoDataset(dataset=dataset, split='val',  clip_len=clip_len, preprocess=IF_PREPROCESS_VAL, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS)
    test_dataloader  = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=clip_len, preprocess=IF_PREPROCESS_TEST, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    cudnn.benchmark = True

    global_best_val_acc = 0

    for epoch in range(num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0
            # running_roc = 0.0

            list_pred = list()
            list_label = list()

            # print(optimizer)

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            # for inputs, labels in tqdm(trainval_loaders[phase]):
            run_count = 0
            for inputs, labels in trainval_loaders[phase]:
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                # try:
                #     running_roc += roc_auc_score(labels.data.cpu(), preds.cpu())
                # except:
                #     y_true = labels.data.cpu().tolist()
                #     y_true_2 = y_true.copy()
                #     for i_cls in range(N_CLASSES):
                #         y_true_2.append(i_cls)
                #
                #     y_pred = preds.cpu().tolist()
                #     y_pred_2 = y_pred.copy()
                #     for i_cls in range(N_CLASSES):
                #         y_pred_2.append(i_cls)
                #
                #     running_roc += roc_auc_score(y_true_2, y_pred_2)
                #
                # run_count += 1
                list_label += labels.data.cpu().tolist()
                list_pred += preds.cpu().tolist()

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]
            epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES)

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
                writer.add_scalar('data/train_roc_epoch', epoch_roc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)
                writer.add_scalar('data/val_roc_epoch', epoch_roc, epoch)
                # if epoch_acc >= global_best_val_acc:
                #     torch.save({
                #         'epoch': epoch + 1,
                #         'state_dict': model.state_dict(),
                #         'opt_dict': optimizer.state_dict(),
                #     }, os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar'))
                #     print("Save model at {}\n".format(
                #         os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar')))

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}, ROC:{}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc))
            stop_time = timeit.default_timer()
            # print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0
            # running_roc = 0.0
            list_pred = list()
            list_label = list()

            # for inputs, labels in tqdm(test_dataloader):
            run_count = 0
            for inputs, labels in test_dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                # try:
                #     running_roc += roc_auc_score(labels.data.cpu(), preds.cpu())
                # except:
                #     running_roc += 0.5
                # run_count += 1
                list_label += labels.data.cpu().tolist()
                list_pred += preds.cpu().tolist()

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size
            epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES)

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)
            writer.add_scalar('data/test_roc_epoch', epoch_roc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc:{} ROC: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc))
            stop_time = timeit.default_timer()
            # print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Exemplo n.º 15
0
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot,
                useTest=useTest,
                test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=True)
        train_params = [{
            'params': C3D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': C3D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]
    elif modelName == 'R2Plus1D':
        # model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        # train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
        #                 {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
        model = models.video.r2plus1d_18(pretrained=True, progress=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)
        model = model.to(device)
        train_params = model.parameters()
        # for name, param in model.named_parameters():
        #     print(name, param.data)
        #     jdks
    elif modelName == 'R3D':
        # model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        # train_params = model.parameters()
        model = models.video.r3d_18(pretrained=True, progress=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)
        model = model.to(device)
        train_params = model.parameters()
    elif modelName == 'MC3':
        # model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        # train_params = model.parameters()
        model = models.video.mc3_18(pretrained=True, progress=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)
        model = model.to(device)
        train_params = model.parameters()

    elif modelName == 'I3D':
        model = I3D.InceptionI3d(num_classes=157)
        load_file = 'rgb_charades.pt'
        model = model.to(device)
        model.load_state_dict(torch.load(load_file))
        model.replace_logits(num_classes=2)
        train_params = model.parameters()

    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss(weight=torch.tensor(
        [1.0 / 375,
         1.0 / 4388]))  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=10,
        gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs
    #sampler = torch.utils.data.WeightedRandomSampler([1.0/212, 1.0/4388], 8, replacement=True)

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        #checkpoint = torch.load(os.path.join(save_dir, 'checkpoints', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
        #               map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        #print("Initializing weights from: {}...".format(
        #    os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        checkpoint = torch.load(
            'run\\run_10\\models\\I3D-celeb-df_epoch-19.pth.tar',
            map_location=lambda storage, loc: storage)

        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])
        print("Chekpoint loaded")

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(
        save_dir, 'models',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16,
                                               preprocess=False),
                                  batch_size=8,
                                  shuffle=True,
                                  num_workers=4)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                             split='val',
                                             clip_len=16),
                                batch_size=8,
                                num_workers=4,
                                shuffle=True)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                              split='test',
                                              clip_len=16),
                                 batch_size=8,
                                 num_workers=4,
                                 shuffle=True)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {
        x: len(trainval_loaders[x].dataset)
        for x in ['train', 'val']
    }
    test_size = len(test_dataloader.dataset)

    training_loss_history = []
    val_loss_history = []
    for epoch in range(resume_epoch, num_epochs):
        #each epoch has a training and validation step
        # for phase in ['train', 'val']:
        #     start_time = timeit.default_timer()

        #     # reset the running loss and corrects
        #     running_loss = 0.0
        #     running_corrects = 0.0

        #     # set model to train() or eval() mode depending on whether it is trained
        #     # or being validated. Primarily affects layers such as BatchNorm or Dropout.
        #     if phase == 'train':
        #         # scheduler.step() is to be called once every epoch during training
        #         scheduler.step()
        #         model.train()
        #     else:
        #         model.eval()

        #     for inputs, labels in tqdm(trainval_loaders[phase]):
        #         # move inputs and labels to the device the training is taking place on
        #         inputs = Variable(inputs, requires_grad=True).to(device)
        #         labels = Variable(labels).to(device)
        #         optimizer.zero_grad()

        #         if phase == 'train':
        #             outputs = model(inputs)

        #         else:
        #             with torch.no_grad():
        #                 outputs = model(inputs)

        #         probs = nn.Softmax(dim=1)(outputs)
        #         preds = torch.max(probs, 1)[1]

        #         loss = criterion(outputs, labels.type(torch.long))

        #         if phase == 'train':
        #             loss.backward()
        #             #torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        #             optimizer.step()
        #             training_loss_history.append(loss.item())
        #         else:
        #             val_loss_history.append(loss.item())

        #         running_loss += loss.item() * inputs.size(0)
        #         running_corrects += torch.sum(preds == labels.data)
        #         #print("Running loss: ", running_loss)
        #         #print("Running corrects: ", running_corrects)

        #     epoch_loss = running_loss / trainval_sizes[phase]
        #     epoch_acc = running_corrects.double() / trainval_sizes[phase]

        #     if phase == 'train':
        #         writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
        #         writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
        #     else:
        #         writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
        #         writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

        #     save_loss(training_loss_history, val_loss_history)

        #     print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
        #     stop_time = timeit.default_timer()
        #     print("Execution time: " + str(stop_time - start_time) + "\n")

        # if epoch % save_epoch == (save_epoch - 1):
        #     torch.save({
        #         'epoch': epoch + 1,
        #         'state_dict': model.state_dict(),
        #         'opt_dict': optimizer.state_dict(),
        #     }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
        #     print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            cat_probs = None
            cat_labels = None

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.type(torch.long))

                if type(cat_probs) != type(None):
                    cat_probs = torch.cat((cat_probs, probs), dim=0)
                    cat_labels = torch.cat((cat_labels, labels), dim=0)
                else:
                    cat_probs = probs
                    cat_labels = labels

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            save_roc_curve(cat_labels, cat_probs)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)
            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(
                epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Exemplo n.º 16
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./dataloaders/ferryboat_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    num_classes = 4
    modelName = 'STP'
    if modelName == 'I3D':
        model = I3D_model.InceptionI3d(num_classes=num_classes, in_channels=3)
        size = (240, 284)
        crop_size = 224
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes,
                                                  layer_sizes=(3, 4, 6, 3))
        size = (171, 128)
        crop_size = 112
    elif modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
        size = (171, 128)
        crop_size = 112
    elif modelName == 'P3D':
        model = p3d_model.P3D63(num_classes=num_classes)
        size = (176, 210)
        crop_size = 160
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes,
                                        layer_sizes=(3, 4, 6, 3))
        size = (171, 128)
        crop_size = 112
    elif modelName == 'STP':
        model = STP_model.STP(num_classes=num_classes, in_channels=3)
        size = (240, 284)
        crop_size = 224

    checkpoint = torch.load('./models/I3D-ferryboat4_epoch-199.pth.tar',
                            map_location=lambda storage, loc: storage)
    model_dict = model.state_dict()
    checkpoint_load = {
        k: v
        for k, v in (checkpoint['state_dict']).items() if k in model_dict
    }
    model_dict.update(checkpoint_load)
    model.load_state_dict(model_dict)

    model.to(device)
    model.eval()

    for root, dirs, files in os.walk('./VAR/ferryboat/test/'):

        l_names = locals()

        l_names['Inshore'] = 0
        l_names['Neg'] = 0
        l_names['Offshore'] = 0
        l_names['Traffic'] = 0
        l_names['Inshore1'] = 0
        l_names['Neg1'] = 0
        l_names['Offshore1'] = 0
        l_names['Traffic1'] = 0

        if len(dirs) > 4:
            video_name = dirs
            for name in video_name:
                class_name = name.split('_')[1]
                video = './ferryboat/' + class_name + "/" + name + '.avi'
                clip = []
                cap = cv2.VideoCapture(video)
                retaining = True
                while retaining:
                    retaining, frame = cap.read()
                    if not retaining and frame is None:
                        continue

                    tmp_ = center_crop(cv2.resize(frame, size), crop_size)
                    tmp = tmp_

                    clip.append(tmp)
                    if len(clip) == 16:
                        inputs = np.array(clip).astype(np.float32)
                        inputs = np.expand_dims(inputs, axis=0)
                        inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
                        inputs = torch.from_numpy(inputs)
                        inputs = torch.autograd.Variable(
                            inputs, requires_grad=False).to(device)
                        with torch.no_grad():
                            if modelName == 'STP':
                                outputs, index = model.forward(inputs)
                            else:
                                outputs = model.forward(inputs)
                        iii = index.cpu().data
                        probs = torch.nn.Softmax(dim=1)(outputs)
                        label = torch.max(probs,
                                          1)[1].detach().cpu().numpy()[0]
                        if modelName == 'I3D':
                            label = int(label[0])
                        pre = class_names[label].split(' ')[1][:-1]
                        l_names[str(class_name)] = l_names[str(class_name)] + 1

                        if str(pre) == str(class_name):
                            l_names[str(class_name) +
                                    '1'] = l_names[str(class_name) + '1'] + 1
                        elif str(class_name) == 'Neg':
                            l_names[str(class_name) +
                                    '1'] = l_names[str(class_name) + '1'] + 1
                        elif str(class_name) == 'Traffic':
                            l_names[str(class_name) +
                                    '1'] = l_names[str(class_name) + '1'] + 1

                        clip.pop(0)

                    cv2.waitKey(30)

                cap.release()
                cv2.destroyAllWindows()
            print(
                str(class_name) + '_acc:' + str(
                    int(l_names[str(class_name) + '1']) /
                    int(l_names[str(class_name)])))
Exemplo n.º 17
0
    def run(self, neuron_list):
        named_children = list(self.mdl.named_children())
        for n, c in named_children:
            print(f"Now visualising filters in {n}")





if __name__ == '__main__':
    sys.path.append('/home/hileyl/scratch/Projects/PyTorch/pytorch-video-recognition/')
    from network import C3D_model
    import collections

    mdl = C3D_model.C3D(101)
    if hasattr(mdl, "module"):
        module = mdl.module
    else:
        module = mdl

    state = torch.load('save_20.pth')
    n_state = []
    for k, v in state['state_dict'].items():
        if 'module' in k:
            n_state.append((k[7:], v))
    if n_state == []:
        n_state = state['state_dict']
    else:
        n_state = collections.OrderedDict(n_state)
    mdl.load_state_dict(n_state)
Exemplo n.º 18
0
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3))
        # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16)
        train_params = model.parameters()
    elif modelName == 'R2D':
        model = R2Dnet.R2DClassifier(group_num_classes=num_classes, pretrained=True)
        # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16)
        train_params = model.parameters()
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    model.to(device)  #move here because resume need .cuda()

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                       map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    # model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VolleyballDataset(dataset=dataset, split='train',clip_len=16), batch_size=4, shuffle=True, \
                                  num_workers=0)
    val_dataloader   = DataLoader(VolleyballDataset(dataset=dataset, split='val',  clip_len=16), batch_size=4, num_workers=0)
    test_dataloader  = DataLoader(VolleyballDataset(dataset=dataset, split='test', clip_len=16), batch_size=4, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            torch.backends.cudnn.benchmark=False
            # for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(trainval_loaders[phase]):
            # for inputs, labels in tqdm(trainval_loaders[phase]):
            for inputs, labels, dists in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                # bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device)
                # adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                dists = Variable(dists, requires_grad = True).to(device)
                # dist_num = Variable(dist_num).to(device)
                optimizer.zero_grad()
                if phase == 'train':
                    outputs = model(inputs, dists)
                    # outputs = model(inputs, bbox_inputs, adjacent_matrix)
                else:
                    with torch.no_grad():
                        # outputs = model(inputs, bbox_inputs, adjacent_matrix)
                        outputs = model(inputs, dists)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)
                print("labels",labels)
                print("outputs",outputs)
                print("loss",loss)

                torch.backends.cudnn.benchmark = False
                if phase == 'train':
                    loss.backward(retain_graph=True)
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            print(os.path.join(save_dir, 'models', saveName + \
            '_epoch-' + str(epoch) + '.pth.tar'))
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(test_dataloader):
            # for inputs, labels in tqdm(test_dataloader):
                bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device)
                adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device)
                inputs = Variable(inputs.to(device))

                with torch.no_grad():
                    # outputs = model(inputs)
                    outputs = model(inputs, bbox_inputs, adjacent_matrix)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Exemplo n.º 19
0
def train_model(dataset=dataset,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=True)
        train_params = [{
            'params': C3D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': C3D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]

    elif modelName == 'resnet':
        model = resnet.generate_model(model_depth=50,
                                      n_classes=700,
                                      n_input_channels=3,
                                      shortcut_type='B',
                                      conv1_t_size=7,
                                      conv1_t_stride=1,
                                      no_max_pool=False,
                                      widen_factor=1.0)
        model = resnet.load_pretrained_model(model,
                                             'network/r3d50_K_200ep.pth',
                                             modelName, num_classes)
        train_params = resnet.get_fine_tuning_parameters(model, 'fc')

    criterion = nn.CrossEntropyLoss(
    )  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=10,
        gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    print("Training {} from scratch...".format(modelName))

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16),
                                  batch_size=20,
                                  shuffle=True,
                                  num_workers=4)

    train_size = len(train_dataloader.dataset)

    for epoch in range(0, num_epochs):
        start_time = timeit.default_timer()

        # reset the running loss and corrects
        running_loss = 0.0
        running_corrects = 0.0

        # scheduler.step() is to be called once every epoch during training
        scheduler.step()
        model.train()

        for inputs, labels in tqdm(train_dataloader):
            # move inputs and labels to the device the training is taking place on
            inputs = Variable(inputs, requires_grad=True).to(device)
            labels = Variable(labels).to(device)
            optimizer.zero_grad()

            outputs = model(inputs)

            probs = nn.Softmax(dim=1)(outputs)
            preds = torch.max(probs, 1)[1]
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects.double() / train_size

        print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
            phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
        stop_time = timeit.default_timer()
        print("Execution time: " + str(stop_time - start_time) + "\n")

    if epoch % save_epoch == (save_epoch - 1):
        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(str(epoch) + '.pth'))
Exemplo n.º 20
0
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=True)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    
    else:
        print('We only implemented C3D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                       map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train',clip_len=16), batch_size=1, shuffle=True, num_workers=0)
    val_dataloader   = DataLoader(VideoDataset(dataset=dataset, split='val',  clip_len=16), batch_size=1, num_workers=0)
    test_dataloader  = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=1, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)

                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                labels=labels.long()
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        

    writer.close()
Exemplo n.º 21
0
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot,
                useTest=useTest,
                test_interval=nTestInterval):
    # 1 导入模型
    model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
    train_params = [{
        'params': C3D_model.get_1x_lr_params(model),
        'lr': lr
    }, {
        'params': C3D_model.get_10x_lr_params(model),
        'lr': lr * 10
    }]

    # 2 损失函数
    criterion = nn.CrossEntropyLoss()
    # 3 优化函数
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    # 4 学习率下降
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    model.cuda()
    criterion.cuda()

    # 6 设置训练集和测试机
    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16),
                                  batch_size=4,
                                  shuffle=True,
                                  num_workers=0)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                             split='val',
                                             clip_len=16),
                                batch_size=4,
                                num_workers=0)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                              split='test',
                                              clip_len=16),
                                 batch_size=4,
                                 num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {
        x: len(trainval_loaders[x].dataset)
        for x in ['train', 'val']
    }
    test_size = len(test_dataloader.dataset)

    # 8 开始训练模型
    for epoch in range(0, num_epochs):
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()
            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            if phase == 'train':
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                inputs = inputs.cuda()
                labels = labels.cuda()
                optimizer.zero_grad()
                # 根据训练集还是测试集进行模型输出选择
                if phase == 'train':
                    outputs = model(inputs)
                else:
                    # 禁用梯度下降上下文管理器
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                # 训练集,进行反向传播
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
                phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        # 保存模型
        if epoch % save_epoch == (save_epoch - 1):
            torch.save(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'opt_dict': optimizer.state_dict(),
                },
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        # 走测试集
        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.cuda()
                labels = labels.cuda()

                with torch.no_grad():
                    outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(
                epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")