def loadMSVD(): logging.info('Loading MSVD dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/MSVD/' name = 'MSVD_VideoDescription' ds = Dataset(name, base_path) max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line. In this dataset a variable number # of descriptions per video are provided. ds.setOutput(base_path + 'train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated videos (inputs) # we must take into account that in this dataset we have a different number of sentences per video, # for this reason we introduce the parameter 'repeat_set'=num_captions, where num_captions is a list # containing the number of captions in each video. num_captions_train = np.load(base_path + 'train_descriptions_counts.npy') num_captions_val = np.load(base_path + 'val_descriptions_counts.npy') num_captions_test = np.load(base_path + 'test_descriptions_counts.npy') ds.setInput([base_path + 'train_imgs_list.txt', base_path + 'train_imgs_counts.txt'], 'train', type='video', id='videos', repeat_set=num_captions_train) ds.setInput([base_path + 'val_imgs_list.txt', base_path + 'val_imgs_counts.txt'], 'val', type='video', id='videos', repeat_set=num_captions_val) ds.setInput([base_path + 'test_imgs_list.txt', base_path + 'test_imgs_counts.txt'], 'test', type='video', id='videos', repeat_set=num_captions_test) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], id='videos') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadMSVD(): logging.info('Loading MSVD dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/MSVD/' name = 'MSVD_VideoDescription' ds = Dataset(name, base_path) max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line. In this dataset a variable number # of descriptions per video are provided. ds.setOutput(base_path + 'train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated videos (inputs) # we must take into account that in this dataset we have a different number of sentences per video, # for this reason we introduce the parameter 'repeat_set'=num_captions, where num_captions is a list # containing the number of captions in each video. num_captions_train = np.load(base_path + 'train_descriptions_counts.npy') num_captions_val = np.load(base_path + 'val_descriptions_counts.npy') num_captions_test = np.load(base_path + 'test_descriptions_counts.npy') ds.setInput([base_path + 'train_imgs_list.txt', base_path + 'train_imgs_counts.txt'], 'train', type='video', id='videos', repeat_set=num_captions_train) ds.setInput([base_path + 'val_imgs_list.txt', base_path + 'val_imgs_counts.txt'], 'val', type='video', id='videos', repeat_set=num_captions_val) ds.setInput([base_path + 'test_imgs_list.txt', base_path + 'test_imgs_counts.txt'], 'test', type='video', id='videos', repeat_set=num_captions_test) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='videos') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadFood101(): logging.info('Loading Food101 dataset') logging.info( 'INFO: in order to load this dataset it must be placed in ../data/Food101/images/ after downloading it form https://www.vision.ee.ethz.ch/datasets_extra/food-101/' ) base_path = '../data/Food101/' name = 'Food101' ds = Dataset(name, base_path + 'images') # Insert inputs (images) ds.setInput(base_path + 'meta/train_split.txt', 'train', type='image', id='images', img_size_crop=[227, 227, 3]) ds.setInput(base_path + 'meta/val_split.txt', 'val', type='image', id='images') ds.setInput(base_path + 'meta/test.txt', 'test', type='image', id='images') # Insert outputs (labels) ds.setOutput(base_path + 'meta/train_labels.txt', 'train', type='categorical', id='labels') ds.setOutput(base_path + 'meta/val_labels.txt', 'val', type='categorical', id='labels') ds.setOutput(base_path + 'meta/test_labels.txt', 'test', type='categorical', id='labels') # Set list of classes (strings) ds.setClasses(base_path + 'meta/classes.txt', 'labels') # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadFlickr8k(): logging.info('Loading Flickr8k dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/Flickr8k/' name = 'Flickr8k_ImageDescription' ds = Dataset(name, base_path + 'Flicker8k_Dataset') max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line # and a set of 5 consecutive descriptions correspond to a single input image ds.setOutput(base_path + 'text/train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'text/val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'text/test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated images (inputs) # we must take into account that in this dataset we have 5 sentences per image, # for this reason we introduce the parameter 'repeat_set'=5 ds.setInput(base_path + 'text/Flickr_8k.trainImages.txt', 'train', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.devImages.txt', 'val', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.testImages.txt', 'test', type='image', id='images', repeat_set=5) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadFlickr8k(): logging.info('Loading Flickr8k dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/Flickr8k/' name = 'Flickr8k_ImageDescription' ds = Dataset(name, base_path + 'Flicker8k_Dataset') max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line # and a set of 5 consecutive descriptions correspond to a single input image ds.setOutput(base_path + 'text/train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'text/val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'text/test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated images (inputs) # we must take into account that in this dataset we have 5 sentences per image, # for this reason we introduce the parameter 'repeat_set'=5 ds.setInput(base_path + 'text/Flickr_8k.trainImages.txt', 'train', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.devImages.txt', 'val', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.testImages.txt', 'test', type='image', id='images', repeat_set=5) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly. %d input samples. %d output samples' % (len(X), len(Y)))
def loadFood101(): logging.info('Loading Food101 dataset') logging.info( 'INFO: in order to load this dataset it must be placed in ../data/Food101/images/ after downloading it form https://www.vision.ee.ethz.ch/datasets_extra/food-101/') base_path = '../data/Food101/' name = 'Food101' ds = Dataset(name, base_path + 'images') # Insert inputs (images) ds.setInput(base_path + 'meta/train_split.txt', 'train', type='image', id='images', img_size_crop=[227, 227, 3]) ds.setInput(base_path + 'meta/val_split.txt', 'val', type='image', id='images') ds.setInput(base_path + 'meta/test.txt', 'test', type='image', id='images') # Insert outputs (labels) ds.setOutput(base_path + 'meta/train_labels.txt', 'train', type='categorical', id='labels') ds.setOutput(base_path + 'meta/val_labels.txt', 'val', type='categorical', id='labels') ds.setOutput(base_path + 'meta/test_labels.txt', 'test', type='categorical', id='labels') # Set list of classes (strings) ds.setClasses(base_path + 'meta/classes.txt', 'labels') # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')