def download_mnist(data_dir): """Download MNIST dataset and convert it to the same format as the Bandcamp dataset (useful as a sanity check).""" response = requests.get('http://deeplearning.net/data/mnist/mnist.pkl.gz') with GzipFile(fileobj=StringIO(response.content), mode='rb') as unzipped: raw_data = cPickle.load(unzipped) dataset = {name: (d[0], d[1].astype('int32')) for name, d in zip(['training', 'validation', 'testing'], raw_data)} label_to_index = dict(zip(range(10), range(10))) with open(os.path.join(data_dir, 'mnist.pkl.zip'), 'wb') as out: pkl_utils.dump((dataset, label_to_index), out)
def download_mnist(data_dir): """Download MNIST dataset and convert it to the same format as the Bandcamp dataset (useful as a sanity check).""" response = requests.get('http://deeplearning.net/data/mnist/mnist.pkl.gz') with GzipFile(fileobj=StringIO(response.content), mode='rb') as unzipped: raw_data = cPickle.load(unzipped) dataset = { name: (d[0], d[1].astype('int32')) for name, d in zip(['training', 'validation', 'testing'], raw_data) } label_to_index = dict(zip(map(str, range(10)), range(10))) with open(os.path.join(data_dir, 'mnist.pkl.zip'), 'wb') as out: pkl_utils.dump((dataset, label_to_index), out)
def create_datasets(image_dir, out_dir, skip_full_pickle=False): """Create the dataset pickles and JSONs. This is a wrapper around collect_dataset_filenames and load_raw_dataset that creates both the local and full datasets. On systems with little memory, pass in skip_full_pickle=True to skip creating the pickle for the full dataset. """ for dataset_name, json_path in collect_dataset_filenames(image_dir, out_dir).iteritems(): if skip_full_pickle and dataset_name == 'full': continue with open(os.path.join(out_dir, '%s.pkl.zip' % dataset_name), 'wb') as out: pkl_utils.dump(load_raw_dataset(json_path), out)
def create_datasets(image_dir, out_dir, skip_full_pickle=False): """Create the dataset pickles and JSONs. This is a wrapper around collect_dataset_filenames and load_raw_dataset that creates both the local and full datasets. On systems with little memory, pass in skip_full_pickle=True to skip creating the pickle for the full dataset. """ for dataset_name, json_path in collect_dataset_filenames(image_dir, out_dir).iteritems(): if skip_full_pickle and dataset_name == 'full': continue # Create both a greyscale representation (no suffix) and an RGB representation (.rgb suffix) for suffix, as_grey, flatten in [('', True, True), ('.rgb', False, False)]: with open(os.path.join(out_dir, '%s%s.pkl.zip' % (dataset_name, suffix)), 'wb') as out: pkl_utils.dump(load_raw_dataset(json_path, as_grey=as_grey, flatten=flatten), out)
def resize_rgb_dataset(in_filename, out_filename, x_size=0, y_size=0): """Resize an RGB dataset so that every image is of the given shape.""" with open(in_filename, 'rb') as dataset_file: in_dataset, label_to_index = pkl_utils.load(dataset_file) shape = (x_size, y_size) out_dataset = {} for subset, (instances, labels) in in_dataset.iteritems(): resized_instances = np.zeros(shape=(len(instances), 3) + shape, dtype=instances.dtype) for i, instance in enumerate(instances): # Transpose the image to (height, width, channels), resize, transpose back, and multiply by 255 to get uint8 resized_instances[i] = resize(instance.transpose(1, 2, 0), shape).transpose(2, 0, 1) * 255 out_dataset[subset] = (resized_instances, labels) with open(out_filename, 'wb') as out: pkl_utils.dump((out_dataset, label_to_index), out)
def create_datasets(image_dir, out_dir, skip_full_pickle=False): """Create the dataset pickles and JSONs. This is a wrapper around collect_dataset_filenames and load_raw_dataset that creates both the local and full datasets. On systems with little memory, pass in skip_full_pickle=True to skip creating the pickle for the full dataset. """ for dataset_name, json_path in collect_dataset_filenames( image_dir, out_dir).iteritems(): if skip_full_pickle and dataset_name == 'full': continue # Create both a greyscale representation (no suffix) and an RGB representation (.rgb suffix) for suffix, as_grey, flatten in [('', True, True), ('.rgb', False, False)]: with open( os.path.join(out_dir, '%s%s.pkl.zip' % (dataset_name, suffix)), 'wb') as out: pkl_utils.dump( load_raw_dataset(json_path, as_grey=as_grey, flatten=flatten), out)
def _save_model_snapshot(output_layer, snapshot_prefix, next_epoch): snapshot_path = '%s.snapshot-%s.pkl.zip' % (snapshot_prefix, next_epoch) print('Saving snapshot to %s' % snapshot_path) with open(snapshot_path, 'wb') as out: pkl_utils.dump((next_epoch, output_layer), out)