def main_iit_v2c(): # Parameters config = FEConfig() model_names = ['resnet50'] annotation_files = ['train.txt', 'test.txt'] for annotation_file in annotation_files: annotations = iit_v2c.load_annotations(config.DATASET_PATH, annotation_file) # Get torch.dataset object clips, targets, vocab, config = iit_v2c.parse_dataset(config, annotation_file, numpy_features=False) config.display() transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) image_dataset = iit_v2c.FeatureDataset(clips, targets, numpy_features=False, transform=transform) for model_name in model_names: extract(config.DATASET_PATH, image_dataset, model_name)
NAME = 'v2c_IIT-V2C' MODE = 'test' ROOT_DIR = ROOT_DIR CHECKPOINT_PATH = os.path.join(ROOT_DIR, 'checkpoints') DATASET_PATH = os.path.join(ROOT_DIR, 'datasets', 'IIT-V2C') MAXLEN = 10 # Setup configuration class config = TestConfig() # Setup tf.dataset object vocab = pickle.load( open(os.path.join(config.CHECKPOINT_PATH, 'vocab.pkl'), 'rb')) annotation_file = config.MODE + '.txt' clips, targets, _, config = iit_v2c.parse_dataset(config, annotation_file, vocab=vocab) test_dataset = iit_v2c.FeatureDataset(clips, targets) test_loader = data.DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=config.WORKERS) config.display() # Setup and build video2command training inference v2c_model = Video2Command(config) v2c_model.build() # Safely create prediction dir if non-exist if not os.path.exists(os.path.join(config.CHECKPOINT_PATH, 'prediction')): os.makedirs(os.path.join(config.CHECKPOINT_PATH, 'prediction'))
""" NAME = 'v2c_IIT-V2C' MODE = 'train' ROOT_DIR = ROOT_DIR CHECKPOINT_PATH = os.path.join(ROOT_DIR, 'checkpoints') DATASET_PATH = os.path.join(ROOT_DIR, 'datasets', 'IIT-V2C') MAXLEN = 10 # Test configuration config = TrainConfig() config.display() print() # Test parse_dataset annotation_file = config.MODE + '.txt' clips, targets, vocab, config = iit_v2c.parse_dataset(config, annotation_file, numpy_features=False) config.display() print('Vocabulary:') print(vocab.word2idx) print('length ("<pad>" included):', len(vocab)) print('dataset:', len(clips), len(targets)) print() transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor()]) train_dataset = iit_v2c.FeatureDataset(clips, targets, numpy_features=False, transform=transform)
"""Configuration for training with IIT-V2C. """ NAME = 'v2c_IIT-V2C' MODE = 'train' CHECKPOINT_PATH = os.path.join(ROOT_DIR, 'checkpoints') DATASET_PATH = os.path.join(ROOT_DIR, 'datasets', 'IIT-V2C') MAXLEN = 10 # Test configuration config = TrainConfig() config.display() print() # Test parse_dataset annotation_file = config.MODE + '.txt' clips, targets, vocab, config = iit_v2c.parse_dataset(config, annotation_file) config.display() print('Vocabulary:') print(vocab.word2idx) print('length ("<#SPECIAL>" included):', len(vocab)) print('dataset:', len(clips), len(targets)) print() train_dataset = iit_v2c.FeatureDataset(clips, targets) train_loader = data.DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=config.WORKERS) # Test torch dataloader object for i, (Xv, S, clip_name) in enumerate(train_loader):