def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--name') parser.add_argument('--folder_outputs') parser.add_argument('--occlusion', type=int) parser.add_argument('--num_objects_all', type=int, nargs='+') parser.add_argument('--folder_downloads', default='downloads') parser.add_argument('--num_train', type=int, default=50000) parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--image_height', type=int, default=48) parser.add_argument('--image_width', type=int, default=48) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() if not os.path.exists(args.folder_outputs): os.mkdir(args.folder_outputs) # Elements mnist = { phase: torchvision.datasets.MNIST(args.folder_downloads, train=train) for phase, train in zip(['train', 'test'], [True, False]) } elements = {key: [create_element(np.array(n[0])) for n in val] for key, val in mnist.items()} back = np.zeros((args.image_height, args.image_width, 2)) back[..., -1] = 1 elements = {key: {'back': back, 'objects': elements[key_prev]} for key, key_prev in zip(['train', 'valid', 'test'], ['train', 'train', 'test'])} # Objects objects = generate_objects(args, elements) create_dataset(os.path.join(args.folder_outputs, args.name), objects) return
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--name', default='mnist') parser.add_argument('--folder_downloads', default='downloads') parser.add_argument('--num_objects', type=int, default=2) parser.add_argument('--image_height', type=int, default=48) parser.add_argument('--image_width', type=int, default=48) parser.add_argument('--num_train', type=int, default=50000) parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--seed', type=int, default=36520) args = parser.parse_args() # Elements if not os.path.exists(args.folder_downloads): os.mkdir(args.folder_downloads) mnist = { phase: torchvision.datasets.MNIST(args.folder_downloads, train=train, download=True) for phase, train in zip(['train', 'test'], [True, False]) } elements = { key: [create_element(np.array(n[0])) for n in val] for key, val in mnist.items() } item_shape = [ args.num_objects + 1, args.image_height, args.image_width, elements['train'][0].shape[-1] ] objects = { 'train': np.empty((args.num_train, *item_shape), dtype=np.float32), 'valid': np.empty((args.num_valid, *item_shape), dtype=np.float32), 'test': np.empty((args.num_test, *item_shape), dtype=np.float32), } # Datasets (20 and 500) for num_variants in [20, 500]: np.random.seed(args.seed) for key in ['train', 'valid', 'test']: for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects( elements['train'][:num_variants], args.image_height, args.image_width, args.num_objects) create_dataset('{}_{}'.format(args.name, num_variants), objects) # Datasets (all) np.random.seed(args.seed) for key in ['train', 'valid']: for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects(elements['train'], args.image_height, args.image_width, args.num_objects) key = 'test' for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects(elements['test'], args.image_height, args.image_width, args.num_objects) create_dataset('{}_all'.format(args.name), objects) return
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--name') parser.add_argument('--folder_outputs') parser.add_argument('--occlusion', type=int) parser.add_argument('--num_objects_all', type=int, nargs='+') parser.add_argument('--num_train', type=int, default=50000) parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--image_height', type=int, default=48) parser.add_argument('--image_width', type=int, default=48) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() if not os.path.exists(args.folder_outputs): os.mkdir(args.folder_outputs) # Elements square = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) triangle = np.array( [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) elements = [square, triangle, triangle[::-1, :].copy()] elements = [n[..., None].repeat(2, axis=-1) for n in elements] back = np.zeros((args.image_height, args.image_width, 2)) back[..., -1] = 1 elements = {'back': back, 'objects': elements} elements = {key: elements for key in ['train', 'valid', 'test']} # Objects objects = generate_objects(args, elements) create_dataset(os.path.join(args.folder_outputs, args.name), objects) return
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--folder_outputs') parser.add_argument('--occlusion', type=int) parser.add_argument('--num_objects_all', type=int, nargs='+') parser.add_argument('--image_size', type=int, default=48) parser.add_argument('--num_train', type=int, default=50000) parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() # Convert Shapes square = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) triangle = np.array( [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) images_all = [square, triangle, triangle[::-1, :].copy()] data = {key: images_all for key in ['train', 'valid', 'test']} # Create dataset images, labels_ami, labels_mse = generate_dataset(args, data) name = 'shapes_{}'.format('_'.join([str(n) for n in args.num_objects_all])) create_dataset(os.path.join(args.folder_outputs, name), images, labels_ami, labels_mse)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=60000) parser.add_argument('--num_parallel', type=int, default=32) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) path_in = os.path.join(args.folder_in, 'multi_dsprites', 'multi_dsprites_colored_on_colored.tfrecords') dataset = multi_dsprites.dataset(path_in, 'colored_on_colored', map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out = os.path.join(args.folder_out, 'dsprites') with tf.Session() as sess: data = sess.run(next_batch) create_dataset(data, path_out, args.num_parallel) return
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--folder_inputs') parser.add_argument('--folder_outputs') parser.add_argument('--name') parser.add_argument('--dependence', type=int) parser.add_argument('--only_object', type=int) parser.add_argument('--num_objects_all', type=int, nargs='+') parser.add_argument('--color_divs', type=int, default=2) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() # Load previous dataset name = '{}_{}'.format(args.name, '_'.join([str(n) for n in args.num_objects_all])) images, labels_ami, labels_mse = load_dataset(args.folder_inputs, name) images_new = { key: np.empty((val.shape[0], 3, *val.shape[2:]), dtype=val.dtype) for key, val in images.items() } labels_mse_new = { key: np.empty((*val.shape[:2], 3, *val.shape[3:]), dtype=val.dtype) for key, val in labels_mse.items() } # Create new dataset colors = [ convert_color(idx, args.color_divs) for idx in range(pow(args.color_divs, 3)) ] colors_compatible = [ compute_colors_compatible(color_ref, colors) for color_ref in colors ] np.random.seed(args.seed) for key in labels_mse_new: for idx in range(labels_mse_new[key].shape[0]): if args.dependence: images_new[key][idx], labels_mse_new[key][idx] = add_color_dep( images[key][idx], labels_mse[key][idx], colors, colors_compatible, args.only_object) else: images_new[key][idx], labels_mse_new[key][idx] = add_color_ind( images[key][idx], labels_mse[key][idx], colors, colors_compatible, args.only_object) create_dataset(os.path.join(args.folder_outputs, name), images_new, labels_ami, labels_mse_new)
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--name') parser.add_argument('--num_objects', type=int) parser.add_argument('--image_height', type=int) parser.add_argument('--image_width', type=int) parser.add_argument('--num_train', type=int, default=50000) parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() # Elements square = np.array([[1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1], [1, 1, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 1, 1], [1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1]]) triangle = np.array([[0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0], [0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) elements = [square, triangle, triangle[::-1, :].copy()] elements = [n[..., None].repeat(2, axis=-1) for n in elements] item_shape = [ args.num_objects + 1, args.image_height, args.image_width, elements[0].shape[-1] ] objects = { 'train': np.empty((args.num_train, *item_shape), dtype=np.float32), 'valid': np.empty((args.num_valid, *item_shape), dtype=np.float32), 'test': np.empty((args.num_test, *item_shape), dtype=np.float32), } # Datasets np.random.seed(args.seed) for key in ['train', 'valid', 'test']: for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects(elements, args.image_height, args.image_width, args.num_objects) create_dataset(args.name, objects) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=70000) parser.add_argument('--num_parallel', type=int, default=32) parser.add_argument('--split_objects', type=int, default=6) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) path_in = os.path.join(args.folder_in, 'clevr_with_masks', 'clevr_with_masks_train.tfrecords') dataset = clevr_with_masks.dataset(path_in, map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out_all = { 'train': os.path.join(args.folder_out, 'clevr_train'), 'extra': os.path.join(args.folder_out, 'clevr_extra'), } with tf.Session() as sess: data_all = sess.run(next_batch) data_all['image'] = np.stack( [crop_and_resize(val) for val in data_all['image']]) data_all['mask'] = np.stack([ np.stack([crop_and_resize(sub_val) for sub_val in val]) for val in data_all['mask'] ]) num_objects = np.sum(data_all['visibility'][:, 1:], axis=1) sel_train = num_objects <= args.split_objects sel_extra = np.bitwise_not(sel_train) data_all = { 'train': {key: val[sel_train] for key, val in data_all.items()}, 'extra': {key: val[sel_extra] for key, val in data_all.items()}, } for phase, data in data_all.items(): path_out = path_out_all[phase] create_dataset(data, path_out, args.num_parallel) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=60000) parser.add_argument('--num_parallel', type=int, default=32) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) for split in ['train', 'empty_room', 'six_objects', 'identical_color']: if split == 'train': file_in = 'objects_room_{}.tfrecords'.format(split) else: file_in = 'objects_room_test_{}.tfrecords'.format(split) path_in = os.path.join(args.folder_in, 'objects_room', file_in) dataset = objects_room.dataset(path_in, split, map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out = os.path.join(args.folder_out, 'room_{}'.format(split)) with tf.Session() as sess: data = sess.run(next_batch) create_dataset(data, path_out, args.num_parallel) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('--path_config') parser.add_argument('--folder_downloads') parser.add_argument('--folder_out') parser.add_argument('--seed', type=int, default=0) config = load_config(parser) back = np.zeros((config['image_height'], config['image_width'], 2), dtype=np.uint8) back[..., -1] = 255 back_list = [back] mnist = [(convert_mnist(np.array(image)), cls) for train in [True, False] for (image, cls) in torchvision.datasets.MNIST( config['folder_downloads'], train=train, download=True)] num_classes = 10 objects_list = [[] for _ in range(num_classes)] for image, cls in mnist: objects_list[cls].append(image) cnt = 0 for n in objects_list[:5]: cnt += len(n) create_dataset(config, back_list, objects_list, name='mnist') return
def main(): # Arguments parser = argparse.ArgumentParser() parser.add_argument('--folder_downloads') parser.add_argument('--folder_outputs') parser.add_argument('--occlusion', type=int) parser.add_argument('--num_objects_all', type=int, nargs='+') parser.add_argument('--image_size', type=int, default=48) parser.add_argument('--num_train', type=int, default=50000) parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() # Convert MNIST mnist = { phase: torchvision.datasets.MNIST(args.folder_downloads, train=train, transform=None, target_transform=None, download=True) for phase, train in zip(['train', 'test'], [True, False]) } mnist = { key: [convert_mnist_image(np.array(n[0])) for n in val] for key, val in mnist.items() } data = { key: mnist[key_mnist] for key, key_mnist in zip(['train', 'valid', 'test'], ['train', 'train', 'test']) } # Create dataset images, labels_ami, labels_mse = generate_dataset(args, data) name = 'mnist_{}'.format('_'.join([str(n) for n in args.num_objects_all])) create_dataset(os.path.join(args.folder_outputs, name), images, labels_ami, labels_mse)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--folder_in') parser.add_argument('--folder_out') parser.add_argument('--num_data', type=int, default=60000) parser.add_argument('--num_parallel', type=int, default=32) args = parser.parse_args() if not os.path.exists(args.folder_out): os.makedirs(args.folder_out) path_in = os.path.join(args.folder_in, 'clevr_with_masks', 'clevr_with_masks_train.tfrecords') dataset = clevr_with_masks.dataset(path_in, map_parallel_calls=args.num_parallel) next_batch = get_next_batch(dataset, args.num_data) path_out = os.path.join(args.folder_out, 'clevr') with tf.Session() as sess: data = sess.run(next_batch) data['image'] = np.stack([crop_and_resize(val) for val in data['image']]) data['mask'] = np.stack([ np.stack([crop_and_resize(sub_val) for sub_val in val]) for val in data['mask'] ]) create_dataset(data, path_out, args.num_parallel) return
parser.add_argument('--num_valid', type=int, default=10000) parser.add_argument('--num_test', type=int, default=10000) parser.add_argument('--image_height', type=int, default=48) parser.add_argument('--image_width', type=int, default=48) parser.add_argument('--seed', type=int, default=265076) args = parser.parse_args() if not os.path.exists(args.folder_outputs): os.mkdir(args.folder_outputs) # Elements mnist = { phase: torchvision.datasets.MNIST(args.folder_downloads, train=train) for phase, train in zip(['train', 'test'], [True, False]) } elements = { key: [create_element(np.array(n[0])) for n in val] for key, val in mnist.items() } back = np.zeros((2, args.image_height, args.image_width)) back[-1] = 1 elements = { key: { 'back': back, 'objects': elements[key_prev] } for key, key_prev in zip(['train', 'valid', 'test'], ['train', 'train', 'test']) } # Objects objects = generate_objects(args, elements) create_dataset(os.path.join(args.folder_outputs, args.name), objects)
[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) shapes = [square, triangle, triangle[::-1, :].copy()] image_size = 28 num_objects = args.num_objects num_data = {'train': 50000, 'valid': 10000, 'test': 10000} images = { key: np.empty((val, 1, image_size, image_size), dtype=np.float32) for key, val in num_data.items() } labels_ami = { key: np.empty((val, image_size, image_size), dtype=np.float32) for key, val in num_data.items() } labels_mse = { key: np.empty((val, num_objects, 1, image_size, image_size), dtype=np.float32) for key, val in num_data.items() } np.random.seed(265076) for key in ['train', 'valid', 'test']: for idx in range(num_data[key]): images[key][idx], labels_ami[key][idx], labels_mse[key][ idx] = generate_data(image_size, num_objects) create_dataset('shapes_28x28_{}'.format(num_objects), images, labels_ami, labels_mse)
phase: torchvision.datasets.MNIST(args.folder_downloads, train=train, download=True) for phase, train in zip(['train', 'test'], [True, False]) } elements = {key: [create_element(np.array(n[0])) for n in val] for key, val in mnist.items()} objects = { 'train': np.empty((args.num_train, args.num_objects + 1, elements['train'][0].shape[0], args.image_height, args.image_width), dtype=np.float32), 'valid': np.empty((args.num_valid, args.num_objects + 1, elements['train'][0].shape[0], args.image_height, args.image_width), dtype=np.float32), 'test': np.empty((args.num_test, args.num_objects + 1, elements['train'][0].shape[0], args.image_height, args.image_width), dtype=np.float32), } # Datasets (20 and 500) for num_variants in [20, 500]: np.random.seed(args.seed) for key in ['train', 'valid', 'test']: for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects( elements['train'][:num_variants], args.image_height, args.image_width, args.num_objects) create_dataset('{}_{}'.format(args.name, num_variants), objects) # Datasets (all) np.random.seed(args.seed) for key in ['train', 'valid']: for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects(elements['train'], args.image_height, args.image_width, args.num_objects) key = 'test' for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects(elements['test'], args.image_height, args.image_width, args.num_objects) create_dataset('{}_all'.format(args.name), objects)
mnist_targets = f['normalized_full/training/targets'][:] mnist_digits_test = f['normalized_full/test/default'][0, :] mnist_targets_test = f['normalized_full/test/targets'][:] image_size = 48 num_objects = 2 num_data = {'train': 50000, 'valid': 10000, 'test': 10000} images = {key: np.empty((val, 1, image_size, image_size), dtype=np.float32) for key, val in num_data.items()} labels_ami = {key: np.empty((val, image_size, image_size), dtype=np.float32) for key, val in num_data.items()} labels_mse = {key: np.empty((val, num_objects, 1, image_size, image_size), dtype=np.float32) for key, val in num_data.items()} for num_variants in [20, 500]: np.random.seed(36520) for key in ['train', 'valid', 'test']: for idx in range(num_data[key]): digit_indices = np.random.randint(0, num_variants, num_objects) images[key][idx], labels_ami[key][idx], labels_mse[key][idx] = generate_data(image_size, digit_indices) create_dataset('mnist_{}'.format(num_variants), images, labels_ami, labels_mse) np.random.seed(36520) for key in ['train', 'valid']: for idx in range(num_data[key]): digit_indices = np.random.randint(0, 60000, num_objects) images[key][idx], labels_ami[key][idx], labels_mse[key][idx] = generate_data(image_size, digit_indices) key = 'test' for idx in range(num_data[key]): digit_indices = np.random.randint(0, 10000, num_objects) images[key][idx], labels_ami[key][idx], labels_mse[key][idx] = generate_data(image_size, digit_indices, test=True) create_dataset('mnist_all', images, labels_ami, labels_mse)
import argparse import h5py import os from common import create_dataset parser = argparse.ArgumentParser() parser.add_argument('--folder_downloads') parser.add_argument('--filename', default='shapes_28x28.h5') args = parser.parse_args() with h5py.File(os.path.join(args.folder_downloads, args.filename), 'r') as f: images_prev = {key[:5]: f[key]['features'][()] for key in ['training', 'validation', 'test']} labels_ami_prev = {key[:5]: f[key]['groups'][()] for key in ['training', 'validation', 'test']} labels_mse_prev = {key[:5]: f[key]['masks'][()] for key in ['training', 'validation', 'test']} images = {key: val[0, :, None, :, :, 0] for key, val in images_prev.items()} labels_ami = {key: val[0, :, :, :, 0] for key, val in labels_ami_prev.items()} labels_mse = {key: val.transpose(1, 4, 0, 2, 3) for key, val in labels_mse_prev.items()} create_dataset('shapes_28x28_3', images, labels_ami, labels_mse)
description=("A script for interacting with DASIT")) parser.add_argument("-d", "--dataset", dest="dataset", required=True, type=str, help="The dataset name for the command") parser.add_argument("-g", "--group", dest="group", required=True, type=str, help="The group name for the command") args = parser.parse_args() args_dict = vars(args) return args_dict ## Main args = parse_arguments() ds = create_dataset(args['dataset'], args['group']) if ds: print(f"'{ds['group']['name']}' : '{ds['name']}' : {ds['status']}") sys.exit(0) else: sys.exit(1)
# print("labels_ami shape: ", labels_ami.shape) # print("labels_ami: \n", labels_ami[0, :, :]) # print("labels_mse shape", labels_mse.shape) # print("labels_mse_1 \n", labels_mse[0,0,:,:]) # print("labels_mse_2 \n", labels_mse[0,1,:,:]) # print("labels_mse_3 \n", labels_mse[0,2,:,:]) ''' images: (70000, 1, 20, 20) labels_ami: (70000, 20, 20) labels_mse: (70000, 3, 1, 20, 20) ''' sep1, sep2 = 50000, 60000 images = { 'train': images[:sep1], 'valid': images[sep1:sep2], 'test': images[sep2:] } labels_ami = { 'train': labels_ami[:sep1], 'valid': labels_ami[sep1:sep2], 'test': labels_ami[sep2:] } labels_mse = { 'train': labels_mse[:sep1], 'valid': labels_mse[sep1:sep2], 'test': labels_mse[sep2:] } create_dataset('shapes_20x20', images, labels_ami, labels_mse) f.close()
[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0], [0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) elements = [square, triangle, triangle[::-1, :].copy()] elements = [n[None].repeat(2, axis=0) for n in elements] objects = { 'train': np.empty((args.num_train, args.num_objects + 1, elements[0].shape[0], args.image_height, args.image_width), dtype=np.float32), 'valid': np.empty((args.num_valid, args.num_objects + 1, elements[0].shape[0], args.image_height, args.image_width), dtype=np.float32), 'test': np.empty((args.num_test, args.num_objects + 1, elements[0].shape[0], args.image_height, args.image_width), dtype=np.float32), } # Datasets np.random.seed(args.seed) for key in ['train', 'valid', 'test']: for idx in range(objects[key].shape[0]): objects[key][idx] = generate_objects(elements, args.image_height, args.image_width, args.num_objects) create_dataset(args.name, objects)