def extract_images(self, overwrite=False): from neon.data import CIFAR10 from PIL import Image dataset = dict() cifar10 = CIFAR10(path=self.out_dir, normalize=False) dataset['train'], dataset['val'], _ = cifar10.load_data() for setn in ('train', 'val'): data, labels = dataset[setn] img_dir = os.path.join(self.out_dir, setn) ulabels = np.unique(labels) for ulabel in ulabels: subdir = os.path.join(img_dir, str(ulabel)) if not os.path.exists(subdir): os.makedirs(subdir) for idx in range(data.shape[0]): im = np.pad(data[idx].reshape((3, 32, 32)), self.pad_width, mode='mean') im = np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()) im = Image.fromarray(im) path = os.path.join(img_dir, str(labels[idx][0]), str(idx) + '.png') im.save(path, format='PNG') if setn == 'train': self.pixel_mean = list( data.mean(axis=0).reshape(3, -1).mean(axis=1)) self.pixel_mean.reverse( ) # We will see this in BGR order b/c of opencv
def extract_images(out_dir, padded_size): ''' Save CIFAR-10 dataset as PNG files ''' import numpy as np from neon.data import CIFAR10 from PIL import Image dataset = dict() cifar10 = CIFAR10(path=out_dir, normalize=False) dataset['train'], dataset['val'], _ = cifar10.load_data() pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0 pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size)) for setn in ('train', 'val'): data, labels = dataset[setn] img_dir = os.path.join(out_dir, setn) ulabels = np.unique(labels) for ulabel in ulabels: subdir = os.path.join(img_dir, str(ulabel)) if not os.path.exists(subdir): os.makedirs(subdir) for idx in range(data.shape[0]): im = np.pad(data[idx].reshape((3, 32, 32)), pad_width, mode='mean') im = np.uint8(np.transpose(im, axes=[1, 2, 0]).copy()) im = Image.fromarray(im) path = os.path.join(img_dir, str(labels[idx][0]), str(idx) + '.png') im.save(path, format='PNG')
def ingest_cifar10(out_dir, padded_size, overwrite=False): """ Save CIFAR-10 dataset as PNG files """ dataset = dict() cifar10 = CIFAR10(path=out_dir, normalize=False) dataset['train'], dataset['val'], _ = cifar10.load_data() pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0 pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size)) set_names = ('train', 'val') manifest_files = [ os.path.join(out_dir, setn + '-index.csv') for setn in set_names ] cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train.cfg') log_file = os.path.join(out_dir, 'train.log') manifest_list_cfg = ', '.join( [k + ':' + v for k, v in zip(set_names, manifest_files)]) with open(cfg_file, 'w') as f: f.write('manifest = [{}]\n'.format(manifest_list_cfg)) f.write('manifest_root = {}\n'.format(out_dir)) f.write('log = {}\n'.format(log_file)) f.write('epochs = 165\nrng_seed = 0\nverbose = True\neval_freq = 1\n') f.write('backend = gpu\nbatch_size = 64\n') if all([os.path.exists(manifest) for manifest in manifest_files]) and not overwrite: return manifest_files # Write out label files and setup directory structure lbl_paths, img_paths = dict(), dict(train=dict(), val=dict()) for lbl in range(10): lbl_paths[lbl] = ensure_dirs_exist( os.path.join(out_dir, 'labels', str(lbl) + '.txt')) np.savetxt(lbl_paths[lbl], [lbl], fmt='%d') for setn in ('train', 'val'): img_paths[setn][lbl] = ensure_dirs_exist( os.path.join(out_dir, setn, str(lbl) + '/')) # Now write out image files and manifests for setn, manifest in zip(set_names, manifest_files): records = [] for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))): img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png') im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean') im = Image.fromarray( np.uint8(np.transpose(im, axes=[1, 2, 0]).copy())) # im.save(os.path.join(out_dir, img_path), format='PNG') im.save(img_path, format='PNG') records.append((os.path.relpath(img_path, out_dir), os.path.relpath(lbl_paths[lbl[0]], out_dir))) np.savetxt(manifest, records, fmt='%s,%s') return manifest_files
def ingest_cifar10(out_dir, padded_size, overwrite=False): """ Save CIFAR-10 dataset as PNG files """ dataset = dict() cifar10 = CIFAR10(path=out_dir, normalize=False) dataset['train'], dataset['val'], _ = cifar10.load_data() pad_size = (padded_size - 32) // 2 if padded_size > 32 else 0 pad_width = ((0, 0), (pad_size, pad_size), (pad_size, pad_size)) set_names = ('train', 'val') manifest_files = [ os.path.join(out_dir, setn + '-index.csv') for setn in set_names ] cfg_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'train.cfg') log_file = os.path.join(out_dir, 'train.log') manifest_list_cfg = ', '.join( [k + ':' + v for k, v in zip(set_names, manifest_files)]) with open(cfg_file, 'w') as f: f.write('manifest = [{}]\n'.format(manifest_list_cfg)) f.write('manifest_root = {}\n'.format(out_dir)) f.write('log = {}\n'.format(log_file)) f.write('epochs = 165\nrng_seed = 0\nverbose = True\neval_freq = 1\n') f.write('backend = gpu\nbatch_size = 64\n') if (all([os.path.exists(manifest) for manifest in manifest_files]) and not overwrite): print( "Found existing manfiest files, skipping ingest, use --overwrite to rerun ingest." ) return manifest_files # Now write out image files and manifests for setn, manifest in zip(set_names, manifest_files): img_path = os.path.join(out_dir, setn) if not os.path.isdir(img_path): os.makedirs(img_path) records = [('@FILE', 'STRING')] for idx, (img, lbl) in enumerate(tqdm(zip(*dataset[setn]))): fname = os.path.join(img_path, '{}_{:05d}.png'.format(lbl[0], idx)) im = np.pad(img.reshape((3, 32, 32)), pad_width, mode='mean') im = Image.fromarray( np.uint8(np.transpose(im, axes=[1, 2, 0]).copy())) im.save(fname, format='PNG') records.append((os.path.relpath(fname, out_dir), lbl[0])) np.savetxt(manifest, records, fmt='%s\t%s') print("Manifest files written to:\n" + "\n".join(manifest_files))
def ingest_cifar10(out_dir, overwrite=False): ''' Save CIFAR-10 dataset as PNG files ''' dataset = dict() cifar10 = CIFAR10(path=out_dir, normalize=False) dataset['train'], dataset['val'], _ = cifar10.load_data() set_names = ('train', 'val') manifest_files = [ os.path.join(out_dir, setn + '-index.csv') for setn in set_names ] if (all([os.path.exists(manifest) for manifest in manifest_files]) and not overwrite): return manifest_files # Write out label files and setup directory structure lbl_paths, img_paths = dict(), dict(train=dict(), val=dict()) for lbl in range(10): lbl_paths[lbl] = ensure_dirs_exist( os.path.join(out_dir, 'labels', str(lbl) + '.txt')) np.savetxt(lbl_paths[lbl], [lbl], fmt='%d') for setn in ('train', 'val'): img_paths[setn][lbl] = ensure_dirs_exist( os.path.join(out_dir, setn, str(lbl) + '/')) np.random.seed(0) # Now write out image files and manifests for setn, manifest in zip(set_names, manifest_files): records = [] for idx, (img, lbl) in tqdm(enumerate(zip(*dataset[setn]))): img_path = os.path.join(img_paths[setn][lbl[0]], str(idx) + '.png') im = img.reshape((3, 32, 32)) im = Image.fromarray( np.uint8(np.transpose(im, axes=[1, 2, 0]).copy())) im.save(img_path, format='PNG') records.append((img_path, lbl_paths[lbl[0]])) np.random.shuffle(records) np.savetxt(manifest, records, fmt='%s,%s') return manifest_files
from neon import logger as neon_logger from neon.data import CIFAR10 from neon.initializers import Uniform from neon.layers import GeneralizedCost, Affine from neon.models import Model from neon.optimizers import GradientDescentMomentum from neon.transforms import Misclassification, CrossEntropyBinary, Logistic, Rectlin from neon.callbacks.callbacks import Callbacks from neon.util.argparser import NeonArgparser # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() dataset = CIFAR10(path=args.data_dir, normalize=True, contrast_normalize=False, whiten=False) train = dataset.train_iter test = dataset.valid_iter init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) # set up the model layers layers = [ Affine(nout=200, init=init_uni, activation=Rectlin()), Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)) ] cost = GeneralizedCost(costfunc=CrossEntropyBinary())