Exemplo n.º 1
0
Arquivo: images.py Projeto: cajal/cmt
def main(argv):
	# load image and turn into grayscale
	img = rgb2gray(imread('media/newyork.png'))

	# generate data
	inputs, outputs = generate_data_from_image(
		img, input_mask, output_mask, 220000)

	# split data into training, test, and validation sets
	inputs  = split(inputs,  [100000, 200000], 1)
	outputs = split(outputs, [100000, 200000], 1)

	data_train = inputs[0], outputs[0]
	data_test  = inputs[1], outputs[1]
	data_valid = inputs[2], outputs[2]

	# compute normalizing transformation
	pre = WhiteningPreconditioner(*data_train)

	# intialize model
	model = MCGSM(
		dim_in=data_train[0].shape[0],
		dim_out=data_train[1].shape[0],
		num_components=8,
		num_scales=4,
		num_features=32)

	# fit parameters
	model.initialize(*pre(*data_train))
	model.train(*chain(pre(*data_train), pre(*data_valid)),
		parameters={
			'verbosity': 1,
			'max_iter': 1000,
			'threshold': 1e-7,
			'val_iter': 5,
			'val_look_ahead': 10,
			'num_grad': 20,
		})

	# evaluate model
	print 'Average log-likelihood: {0:.4f} [bit/px]'.format(
			-model.evaluate(data_test[0], data_test[1], pre))

	# synthesize a new image
	img_sample = sample_image(img, model, input_mask, output_mask, pre)

	imwrite('newyork_sample.png', img_sample,
		cmap='gray',
		vmin=min(img),
		vmax=max(img))

	# save model
	with open('image_model.pck', 'wb') as handle:
		dump({
			'model': model,
			'input_mask': input_mask,
			'output_mask': output_mask}, handle, 1)

	return 0
Exemplo n.º 2
0
def main(argv):
    # load image and turn into grayscale
    img = rgb2ycc(imread('media/newyork.png'))

    # generate masks for grayscale and color model, respectively
    input_mask0, output_mask0 = generate_masks(7, 1)
    input_mask1, output_mask1 = generate_masks([5, 7, 7], 1, [1, 0, 0])

    # train model
    model0, pre0 = train_model(img[:, :, 0], input_mask0, output_mask0)
    model1, pre1 = train_model(img, input_mask1, output_mask1)

    # synthesize a new image
    img_sample = img.copy()

    # sample intensities
    img_sample[:, :, 0] = sample_image(img_sample[:, :, 0], model0,
                                       input_mask0, output_mask0, pre0)

    # sample color
    img_sample = sample_image(img_sample, model1, input_mask1, output_mask1,
                              pre1)

    # convert back to RGB and enforce constraints
    img_sample = ycc2rgb(img_sample)

    imwrite('newyork_sample.png', img_sample, vmin=0, vmax=255)

    # save model
    with open('image_model.pck', 'wb') as handle:
        dump(
            {
                'model0': model0,
                'model1': model1,
                'input_mask0': input_mask0,
                'input_mask1': input_mask1,
                'output_mask0': output_mask0,
                'output_mask1': output_mask1
            }, handle, 1)

    return 0
Exemplo n.º 3
0
def main(argv):
	# load image and turn into grayscale
	img = rgb2ycc(imread('media/newyork.png'))

	# generate masks for grayscale and color model, respectively
	input_mask0, output_mask0 = generate_masks(7, 1)
	input_mask1, output_mask1 = generate_masks([5, 7, 7], 1, [1, 0, 0])

	# train model
	model0, pre0 = train_model(img[:, :, 0], input_mask0, output_mask0)
	model1, pre1 = train_model(img,          input_mask1, output_mask1)

	# synthesize a new image
	img_sample = img.copy()

	# sample intensities
	img_sample[:, :, 0] = sample_image(
		img_sample[:, :, 0], model0, input_mask0, output_mask0, pre0)

	# sample color
	img_sample = sample_image(
		img_sample, model1, input_mask1, output_mask1, pre1)

	# convert back to RGB and enforce constraints
	img_sample = ycc2rgb(img_sample)

	imwrite('newyork_sample.png', img_sample, vmin=0, vmax=255)

	# save model
	with open('image_model.pck', 'wb') as handle:
		dump({
			'model0': model0,
			'model1': model1,
			'input_mask0': input_mask0,
			'input_mask1': input_mask1,
			'output_mask0': output_mask0,
			'output_mask1': output_mask1}, handle, 1)

	return 0
Exemplo n.º 4
0
Arquivo: images.py Projeto: ominux/cmt
def main(argv):
    # load image and turn into grayscale
    img = rgb2gray(imread('media/newyork.png'))

    # generate data
    inputs, outputs = generate_data_from_image(img, input_mask, output_mask,
                                               220000)

    # split data into training, test, and validation sets
    inputs = split(inputs, [100000, 200000], 1)
    outputs = split(outputs, [100000, 200000], 1)

    data_train = inputs[0], outputs[0]
    data_test = inputs[1], outputs[1]
    data_valid = inputs[2], outputs[2]

    # compute normalizing transformation
    pre = WhiteningPreconditioner(*data_train)

    # intialize model
    model = MCGSM(dim_in=data_train[0].shape[0],
                  dim_out=data_train[1].shape[0],
                  num_components=8,
                  num_scales=4,
                  num_features=32)

    # fit parameters
    model.initialize(*pre(*data_train))
    model.train(*chain(pre(*data_train), pre(*data_valid)),
                parameters={
                    'verbosity': 1,
                    'max_iter': 1000,
                    'threshold': 1e-7,
                    'val_iter': 5,
                    'val_look_ahead': 10,
                    'num_grad': 20,
                })

    # evaluate model
    print 'Average log-likelihood: {0:.4f} [bit/px]'.format(
        -model.evaluate(data_test[0], data_test[1], pre))

    # synthesize a new image
    img_sample = sample_image(img, model, input_mask, output_mask, pre)

    imwrite('newyork_sample.png',
            img_sample,
            cmap='gray',
            vmin=min(img),
            vmax=max(img))

    # save model
    with open('image_model.pck', 'wb') as handle:
        dump(
            {
                'model': model,
                'input_mask': input_mask,
                'output_mask': output_mask
            }, handle, 1)

    return 0
Exemplo n.º 5
0
def main(argv):
    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('model', type=str)
    parser.add_argument('--num_rows', '-r', type=int, default=256)
    parser.add_argument('--num_cols', '-c', type=int, default=256)
    parser.add_argument('--data', '-d', type=str, default=None)
    parser.add_argument('--log', '-L', type=int, default=0)
    parser.add_argument('--output', '-o', type=str, default='sample.png')
    parser.add_argument('--margin', '-M', type=int, default=8)

    args = parser.parse_args(argv[1:])

    model = Experiment(args.model)['model']

    if isinstance(model, PatchRIDE):
        img = model.sample()[0]
        imwrite(args.output, imformat(img, vmin=0, vmax=255, symmetric=False))

    else:
        if args.data is None:
            # initialize image with white noise
            img_init = randn(1, args.num_rows + args.margin * 2, args.num_cols
                             + args.margin * 2, sum(model.num_channels)) / 10.
            img = model.sample(img_init)

            if args.log:
                # linearize and gamma-correct
                img = power(exp(img), .45)

            if args.margin > 0:
                img = img[:, args.margin:-args.margin,
                          args.margin:-args.margin]

            if img.shape[-1] == 3:
                img[img > 255.] = 255.
                img[img < 0.] = 0.
                imwrite(args.output, asarray(img[0, :, :, :], dtype='uint8'))
            else:
                imwrite(args.output, imformat(img[0, :, :, 0], perc=99))

        else:
            if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
                data = imread(args.data)[None]
                vmin, vmax = 0, 255
            else:
                data = loadmat(args.data)['data']
                vmin = percentile(data, 0.02)
                vmax = percentile(data, 98.)

            if data.ndim < 4:
                data = data[:, :, :, None]

            if isinstance(model, MultiscaleRIDE):
                num_channels = 1
            elif isinstance(model, ColorRIDE):
                num_channels = 3
            else:
                num_channels = model.num_channels

            num_pixels = (args.num_rows + args.margin) * (args.num_cols +
                                                          args.margin * 2)

            # initialize image with white noise (but correct marginal distribution)
            img_init = []
            for c in range(num_channels):
                indices = randint(data.size // num_channels, size=num_pixels)
                img_init.append(
                    asarray(data[:, :, :, c].ravel()[indices],
                            dtype=float).reshape(
                                1, args.num_rows + args.margin,
                                args.num_cols + args.margin * 2, 1))
            img_init = concatenate(img_init, 3)

            img_init[img_init < vmin] = vmin
            img_init[img_init > vmax] = vmax

            if isinstance(model, MultiscaleRIDE) or isinstance(
                    model, ColorRIDE):
                data = model._transform(data)
                idx = randint(data.shape[0])
                img = model.sample(
                    img_init,
                    #				min_values=data[idx].min(1).min(0),
                    #				max_values=data[idx].max(1).max(0))
                    min_values=data.min(2).min(1).min(0),
                    max_values=data.max(2).max(1).max(0))
            else:
                #			img_init[:] = img_init.mean()
                img = model.sample(img_init,
                                   min_values=percentile(data, .1),
                                   max_values=percentile(data, 99.8))
    #				min_values=percentile(data, 1.),
    #				max_values=percentile(data, 96.))

            if args.log:
                # linearize and gamma-correct
                img = power(exp(img), .45)
                vmin = power(exp(vmin), .45)
                vmax = power(exp(vmax), .45)

            try:
                savez(args.output.split('.')[0] + '.npz', sample=img)
            except:
                pass

            if args.margin > 0:
                img = img[:, args.margin:, args.margin:-args.margin]

            if num_channels == 1:
                imwrite(
                    args.output,
                    imformat(img[0, :, :, 0],
                             vmin=vmin,
                             vmax=vmax,
                             symmetric=False))
            else:
                imwrite(
                    args.output,
                    imformat(img[0], vmin=vmin, vmax=vmax, symmetric=False))

    return 0
Exemplo n.º 6
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--patch_size',      '-p', type=int,   default=[8, 10, 12, 14, 16, 18, 20, 22], nargs='+')
	parser.add_argument('--row_multiplier',  '-R', type=int, default=[1], nargs='+',
		help='Can be used to train on elongated patches.')
	parser.add_argument('--col_multiplier',  '-C', type=int, default=[1], nargs='+',
		help='Can be used to train on elongated patches.')
	parser.add_argument('--num_patches',     '-P', type=int,   default=None,
		help='If given, subsample training data.')
	parser.add_argument('--num_valid',       '-V', type=int,   default=0,
		help='Number of training images used for validation error based early stopping.')
	parser.add_argument('--finetune',        '-F', type=int,   default=[1], nargs='+',
		help='Indicate iterations in which to finetune MCGSM with L-BFGS.')
	parser.add_argument('--learning_rate',   '-l', type=float, default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005], nargs='+')
	parser.add_argument('--momentum',        '-m', type=float, default=[.9], nargs='+')
	parser.add_argument('--batch_size',      '-B', type=int,   default=[50], nargs='+')
	parser.add_argument('--nb_size',         '-b', type=int,   default=5,
		help='Size of the causal neighborhood of pixels.')
	parser.add_argument('--num_hiddens',     '-n', type=int,   default=64)
	parser.add_argument('--num_components',  '-c', type=int,   default=32)
	parser.add_argument('--num_scales',      '-s', type=int,   default=4)
	parser.add_argument('--num_features',    '-f', type=int,   default=32)
	parser.add_argument('--num_epochs',      '-e', type=int,   default=[1], nargs='+')
	parser.add_argument('--precondition',    '-Q', type=int,   default=1)
	parser.add_argument('--method',          '-M', type=str,   default=['SGD'], nargs='+')
	parser.add_argument('--data',            '-d', type=str,   default='data/deadleaves_train.mat')
	parser.add_argument('--noise',           '-N', type=float, default=None,
		help='Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).')
	parser.add_argument('--model',           '-I', type=str,   default='',
		help='Start with this model as initialization. Other flags will be ignored.')
	parser.add_argument('--add_layer',       '-a', type=int,   default=[0], nargs='+')
	parser.add_argument('--train_top_layer', '-T', type=int,   default=[0], nargs='+')
	parser.add_argument('--train_means',     '-S', type=int,   default=[0], nargs='+')
	parser.add_argument('--mode',            '-q', type=str,   default='CPU', choices=['CPU', 'GPU'])
	parser.add_argument('--device',          '-D', type=int,   default=0)
	parser.add_argument('--augment',         '-A', type=int,   default=1,
		help='Increase training set size by transforming data.')
	parser.add_argument('--overlap',         '-O', type=int,   default=[1], nargs='+')
	parser.add_argument('--output',          '-o', type=str,   default='results/deadleaves/')
	parser.add_argument('--patch_model',           type=int,   default=0,
		help='Train a patch-based model instead of a stochastic process.')
	parser.add_argument('--extended',        '-X', type=int,   default=0,
		help='Use extended version of spatial LSTM.')
	parser.add_argument('--multiscale',      '-Y', type=int,   default=0,
		help='Apply recurrent image model to multiscale representation of images.')
	parser.add_argument('--color',           '-Z', type=int,   default=0,
		help='Use separate models to model color and grayscale values.')

	args = parser.parse_args(argv[1:])

	experiment = Experiment()

	if args.mode.upper() == 'GPU':
		caffe.set_mode_gpu()
		caffe.set_device(args.device)
	else:
		caffe.set_mode_cpu()

	# load data
	if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
		data = imread(args.data)[None]
		data += rand(*data.shape)
	else:
		data = loadmat(args.data)['data']

	if args.augment > 0:
		data = vstack([data, data[:, :, ::-1]])
	if args.augment > 1:
		data = vstack([data, data[:, ::-1, :]])

	if args.noise is not None:
		# add noise as a means for regularization
		data += randn(*data.shape) * (std(data, ddof=1) / args.noise)

	if args.num_valid > 0:
		if args.num_valid >= data.shape[0]:
			print 'Cannot use {0} for validation, there are only {1} training images.'.format(
					args.num_valid, data.shape[0])
			return 1

		# select subset for validation
		idx = random_select(args.num_valid, data.shape[0])
		data_valid = data[idx]
		data = asarray([image for i, image in enumerate(data) if i not in idx])

		print '{0} training images'.format(data.shape[0])
		print '{0} validation images'.format(data_valid.shape[0])

		patches_valid = []
		patch_size = min([64, data.shape[1], data.shape[2]])
		for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size):
			for j in range(0, data_valid.shape[2] - patch_size + 1, patch_size):
				patches_valid.append(data_valid[:, i:i + patch_size, j:j + patch_size])
		patches_valid = vstack(patches_valid)

	if args.model:
		# load pretrained model
		results = Experiment(args.model)
		model = results['model']
		loss = [results['loss']]

		if args.patch_model and not isinstance(model, PatchRIDE):
			model = PatchRIDE(
				model=model,
				num_rows=args.patch_size[0],
				num_cols=args.patch_size[0])

	else:
		# create recurrent image model
		if args.patch_model:
			model = PatchRIDE(
				num_rows=args.patch_size[0],
				num_cols=args.patch_size[0],
				num_channels=data.shape[-1] if data.ndim > 3 else 1,
				num_hiddens=args.num_hiddens,
				nb_size=args.nb_size,
				num_components=args.num_components,
				num_scales=args.num_scales,
				num_features=args.num_features,
				model_class=ColorRIDE if args.color else RIDE)

			if args.extended:
				print 'Extended patch model not supported.'
				return 0

			if args.multiscale:
				print 'Multiscale patch model not supported.'
				return 0

		else:
			if args.multiscale:
				if data.ndim > 3 and data.shape[-1] > 1:
					print 'Multiscale color model not supported.'
					return 0

				model = MultiscaleRIDE(
					num_hiddens=args.num_hiddens,
					nb_size=args.nb_size,
					num_components=args.num_components,
					num_scales=args.num_scales,
					num_features=args.num_features,
					extended=args.extended > 0)

			elif args.color:
				if data.ndim < 4 or data.shape[-1] != 3:
					print 'These images don\'t look like RGB images.'
					return 0
				model = ColorRIDE(
					num_hiddens=args.num_hiddens,
					nb_size=args.nb_size,
					num_components=args.num_components,
					num_scales=args.num_scales,
					num_features=args.num_features,
					extended=args.extended > 0)

			else:
				model = RIDE(
					num_channels=data.shape[-1] if data.ndim > 3 else 1,
					num_hiddens=args.num_hiddens,
					nb_size=args.nb_size,
					num_components=args.num_components,
					num_scales=args.num_scales,
					num_features=args.num_features,
					extended=args.extended > 0)

		loss = []

	# compute initial performance
	loss_valid = []

	if args.num_valid > 0:
		print 'Computing validation loss...'
		loss_valid.append(model.evaluate(patches_valid))
		model_copy = deepcopy(model)

	for k, patch_size in enumerate(args.patch_size):
		if args.multiscale:
			patch_size *= 2

		if k < len(args.add_layer):
			for _ in range(args.add_layer[k]):
				# add spatial LSTM to the network
				model.add_layer()

		# extract patches of given patch size
		patches = []

		row_size = patch_size * args.row_multiplier[k % len(args.row_multiplier)]
		col_size = patch_size * args.col_multiplier[k % len(args.col_multiplier)]

		if isinstance(model, PatchRIDE):
			model.num_rows = row_size
			model.num_cols = col_size

		for i in range(0, data.shape[1] - row_size + 1, row_size / args.overlap[k % len(args.overlap)]):
			for j in range(0, data.shape[2] - col_size + 1, col_size / args.overlap[k % len(args.overlap)]):
				patches.append(data[:, i:i + row_size, j:j + col_size])
		patches = vstack(patches)

		# randomize order of patches
		if args.num_patches is not None and args.num_patches < len(patches):
			patches = patches[random_select(args.num_patches, len(patches))]
		else:
			patches = patches[permutation(len(patches))]

		# determine batch size
		if args.method[k % len(args.method)].upper() == 'SFO':
			num_batches = int(max([25, sqrt(patches.shape[0]) / 5.]))
			batch_size = patches.shape[0] // num_batches
		else:
			batch_size = args.batch_size[k % len(args.batch_size)]

		if batch_size < 1:
			raise RuntimeError('Too little data.')

		print 'Patch size: {0}x{1}'.format(row_size, col_size)
		print 'Number of patches: {0}'.format(patches.shape[0])
		print 'Batch size: {0}'.format(batch_size)

		# train recurrent image model
		print 'Training...'
		loss.append(
			model.train(patches,
				batch_size=batch_size,
				method=args.method[k % len(args.method)],
				num_epochs=args.num_epochs[k % len(args.num_epochs)],
				learning_rate=args.learning_rate[k % len(args.learning_rate)],
				momentum=args.momentum[k % len(args.momentum)],
				precondition=args.precondition > 0,
				train_top_layer=args.train_top_layer[k % len(args.train_top_layer)] > 0,
				train_means=args.train_means[k % len(args.train_means)] > 0))

		if args.finetune[k % len(args.finetune)]:
			print 'Finetuning...'
			model.finetune(patches, num_samples_train=1000000, max_iter=500)

		if args.num_valid > 0:
			print 'Computing validation loss...'
			loss_valid.append(model.evaluate(patches_valid))

			if loss_valid[-1] > loss_valid[-2]:
				print 'Performance got worse. Stopping optimization.'
				model = model_copy
				break

			print 'Copying model...'

			model_copy = deepcopy(model)

		experiment['batch_size'] = batch_size
		experiment['args'] = args
		experiment['model'] = model
		experiment['loss_valid'] = loss_valid
		experiment['loss'] = hstack(loss) if len(loss) > 0 else []
		experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck'))

	return 0
Exemplo n.º 7
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('model',            type=str)
	parser.add_argument('--num_rows', '-r', type=int, default=256)
	parser.add_argument('--num_cols', '-c', type=int, default=256)
	parser.add_argument('--data',     '-d', type=str, default=None)
	parser.add_argument('--log',      '-L', type=int, default=0)
	parser.add_argument('--output',   '-o', type=str, default='sample.png')
	parser.add_argument('--margin',   '-M', type=int, default=8)

	args = parser.parse_args(argv[1:])

	model = Experiment(args.model)['model']

	if isinstance(model, PatchRIDE):
		img = model.sample()[0]
		imwrite(args.output, imformat(img, vmin=0, vmax=255, symmetric=False))

	else:
		if args.data is None:
			# initialize image with white noise
			img_init = randn(1,
				args.num_rows + args.margin * 2,
				args.num_cols + args.margin * 2,
				sum(model.num_channels)) / 10.
			img = model.sample(img_init)

			if args.log:
				# linearize and gamma-correct
				img = power(exp(img), .45)

			if args.margin > 0:
				img = img[:, args.margin:-args.margin, args.margin:-args.margin]

			if img.shape[-1] == 3:
				img[img > 255.] = 255.
				img[img < 0.] = 0.
				imwrite(args.output, asarray(img[0, :, :, :], dtype='uint8'))
			else:
				imwrite(args.output, imformat(img[0, :, :, 0], perc=99))

		else:
			if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
				data = imread(args.data)[None]
				vmin, vmax = 0, 255
			else:
				data = loadmat(args.data)['data']
				vmin = percentile(data, 0.02)
				vmax = percentile(data, 98.)

			if data.ndim < 4:
				data = data[:, :, :, None]

			if isinstance(model, MultiscaleRIDE):
				num_channels = 1
			elif isinstance(model, ColorRIDE):
				num_channels = 3
			else:
				num_channels = model.num_channels

			num_pixels = (args.num_rows + args.margin) * (args.num_cols + args.margin * 2)

			# initialize image with white noise (but correct marginal distribution)
			img_init = []
			for c in range(num_channels):
				indices = randint(data.size // num_channels, size=num_pixels)
				img_init.append(
					asarray(data[:, :, :, c].ravel()[indices], dtype=float).reshape(
					1,
					args.num_rows + args.margin,
					args.num_cols + args.margin * 2, 1))
			img_init = concatenate(img_init, 3)

			img_init[img_init < vmin] = vmin
			img_init[img_init > vmax] = vmax

			if isinstance(model, MultiscaleRIDE) or isinstance(model, ColorRIDE):
				data = model._transform(data)
				idx = randint(data.shape[0])
				img = model.sample(img_init,
	#				min_values=data[idx].min(1).min(0),
	#				max_values=data[idx].max(1).max(0))
					min_values=data.min(2).min(1).min(0),
					max_values=data.max(2).max(1).max(0))
			else:
	#			img_init[:] = img_init.mean()
				img = model.sample(img_init,
					min_values=percentile(data, .1),
					max_values=percentile(data, 99.8))
	#				min_values=percentile(data, 1.),
	#				max_values=percentile(data, 96.))

			if args.log:
				# linearize and gamma-correct
				img = power(exp(img), .45)
				vmin = power(exp(vmin), .45)
				vmax = power(exp(vmax), .45)

			try:
				savez(args.output.split('.')[0] + '.npz', sample=img)
			except:
				pass

			if args.margin > 0:
				img = img[:, args.margin:, args.margin:-args.margin]

			if num_channels == 1:
				imwrite(args.output,
					imformat(img[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False))
			else:
				imwrite(args.output,
					imformat(img[0], vmin=vmin, vmax=vmax, symmetric=False))

	return 0
Exemplo n.º 8
0
def main(argv):
    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('--patch_size',
                        '-p',
                        type=int,
                        default=[8, 10, 12, 14, 16, 18, 20, 22],
                        nargs='+')
    parser.add_argument('--row_multiplier',
                        '-R',
                        type=int,
                        default=[1],
                        nargs='+',
                        help='Can be used to train on elongated patches.')
    parser.add_argument('--col_multiplier',
                        '-C',
                        type=int,
                        default=[1],
                        nargs='+',
                        help='Can be used to train on elongated patches.')
    parser.add_argument('--num_patches',
                        '-P',
                        type=int,
                        default=None,
                        help='If given, subsample training data.')
    parser.add_argument(
        '--num_valid',
        '-V',
        type=int,
        default=0,
        help=
        'Number of training images used for validation error based early stopping.'
    )
    parser.add_argument(
        '--finetune',
        '-F',
        type=int,
        default=[1],
        nargs='+',
        help='Indicate iterations in which to finetune MCGSM with L-BFGS.')
    parser.add_argument('--learning_rate',
                        '-l',
                        type=float,
                        default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005],
                        nargs='+')
    parser.add_argument('--momentum',
                        '-m',
                        type=float,
                        default=[.9],
                        nargs='+')
    parser.add_argument('--batch_size',
                        '-B',
                        type=int,
                        default=[50],
                        nargs='+')
    parser.add_argument('--nb_size',
                        '-b',
                        type=int,
                        default=5,
                        help='Size of the causal neighborhood of pixels.')
    parser.add_argument('--num_hiddens', '-n', type=int, default=64)
    parser.add_argument('--num_components', '-c', type=int, default=32)
    parser.add_argument('--num_scales', '-s', type=int, default=4)
    parser.add_argument('--num_features', '-f', type=int, default=32)
    parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+')
    parser.add_argument('--precondition', '-Q', type=int, default=1)
    parser.add_argument('--method', '-M', type=str, default=['SGD'], nargs='+')
    parser.add_argument('--data',
                        '-d',
                        type=str,
                        default='data/deadleaves_train.mat')
    parser.add_argument(
        '--noise',
        '-N',
        type=float,
        default=None,
        help=
        'Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).'
    )
    parser.add_argument(
        '--model',
        '-I',
        type=str,
        default='',
        help=
        'Start with this model as initialization. Other flags will be ignored.'
    )
    parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+')
    parser.add_argument('--train_top_layer',
                        '-T',
                        type=int,
                        default=[0],
                        nargs='+')
    parser.add_argument('--train_means',
                        '-S',
                        type=int,
                        default=[0],
                        nargs='+')
    parser.add_argument('--mode',
                        '-q',
                        type=str,
                        default='CPU',
                        choices=['CPU', 'GPU'])
    parser.add_argument('--device', '-D', type=int, default=0)
    parser.add_argument(
        '--augment',
        '-A',
        type=int,
        default=1,
        help='Increase training set size by transforming data.')
    parser.add_argument('--overlap', '-O', type=int, default=[1], nargs='+')
    parser.add_argument('--output',
                        '-o',
                        type=str,
                        default='results/deadleaves/')
    parser.add_argument(
        '--patch_model',
        type=int,
        default=0,
        help='Train a patch-based model instead of a stochastic process.')
    parser.add_argument('--extended',
                        '-X',
                        type=int,
                        default=0,
                        help='Use extended version of spatial LSTM.')
    parser.add_argument(
        '--multiscale',
        '-Y',
        type=int,
        default=0,
        help=
        'Apply recurrent image model to multiscale representation of images.')
    parser.add_argument(
        '--color',
        '-Z',
        type=int,
        default=0,
        help='Use separate models to model color and grayscale values.')

    args = parser.parse_args(argv[1:])

    experiment = Experiment()

    if args.mode.upper() == 'GPU':
        caffe.set_mode_gpu()
        caffe.set_device(args.device)
    else:
        caffe.set_mode_cpu()

    # load data
    if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
        data = imread(args.data)[None]
        data += rand(*data.shape)
    else:
        data = loadmat(args.data)['data']

    if args.augment > 0:
        data = vstack([data, data[:, :, ::-1]])
    if args.augment > 1:
        data = vstack([data, data[:, ::-1, :]])

    if args.noise is not None:
        # add noise as a means for regularization
        data += randn(*data.shape) * (std(data, ddof=1) / args.noise)

    if args.num_valid > 0:
        if args.num_valid >= data.shape[0]:
            print 'Cannot use {0} for validation, there are only {1} training images.'.format(
                args.num_valid, data.shape[0])
            return 1

        # select subset for validation
        idx = random_select(args.num_valid, data.shape[0])
        data_valid = data[idx]
        data = asarray([image for i, image in enumerate(data) if i not in idx])

        print '{0} training images'.format(data.shape[0])
        print '{0} validation images'.format(data_valid.shape[0])

        patches_valid = []
        patch_size = min([64, data.shape[1], data.shape[2]])
        for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size):
            for j in range(0, data_valid.shape[2] - patch_size + 1,
                           patch_size):
                patches_valid.append(data_valid[:, i:i + patch_size,
                                                j:j + patch_size])
        patches_valid = vstack(patches_valid)

    if args.model:
        # load pretrained model
        results = Experiment(args.model)
        model = results['model']
        loss = [results['loss']]

        if args.patch_model and not isinstance(model, PatchRIDE):
            model = PatchRIDE(model=model,
                              num_rows=args.patch_size[0],
                              num_cols=args.patch_size[0])

    else:
        # create recurrent image model
        if args.patch_model:
            model = PatchRIDE(
                num_rows=args.patch_size[0],
                num_cols=args.patch_size[0],
                num_channels=data.shape[-1] if data.ndim > 3 else 1,
                num_hiddens=args.num_hiddens,
                nb_size=args.nb_size,
                num_components=args.num_components,
                num_scales=args.num_scales,
                num_features=args.num_features,
                model_class=ColorRIDE if args.color else RIDE)

            if args.extended:
                print 'Extended patch model not supported.'
                return 0

            if args.multiscale:
                print 'Multiscale patch model not supported.'
                return 0

        else:
            if args.multiscale:
                if data.ndim > 3 and data.shape[-1] > 1:
                    print 'Multiscale color model not supported.'
                    return 0

                model = MultiscaleRIDE(num_hiddens=args.num_hiddens,
                                       nb_size=args.nb_size,
                                       num_components=args.num_components,
                                       num_scales=args.num_scales,
                                       num_features=args.num_features,
                                       extended=args.extended > 0)

            elif args.color:
                if data.ndim < 4 or data.shape[-1] != 3:
                    print 'These images don\'t look like RGB images.'
                    return 0
                model = ColorRIDE(num_hiddens=args.num_hiddens,
                                  nb_size=args.nb_size,
                                  num_components=args.num_components,
                                  num_scales=args.num_scales,
                                  num_features=args.num_features,
                                  extended=args.extended > 0)

            else:
                model = RIDE(
                    num_channels=data.shape[-1] if data.ndim > 3 else 1,
                    num_hiddens=args.num_hiddens,
                    nb_size=args.nb_size,
                    num_components=args.num_components,
                    num_scales=args.num_scales,
                    num_features=args.num_features,
                    extended=args.extended > 0)

        loss = []

    # compute initial performance
    loss_valid = []

    if args.num_valid > 0:
        print 'Computing validation loss...'
        loss_valid.append(model.evaluate(patches_valid))
        model_copy = deepcopy(model)

    for k, patch_size in enumerate(args.patch_size):
        if args.multiscale:
            patch_size *= 2

        if k < len(args.add_layer):
            for _ in range(args.add_layer[k]):
                # add spatial LSTM to the network
                model.add_layer()

        # extract patches of given patch size
        patches = []

        row_size = patch_size * args.row_multiplier[k %
                                                    len(args.row_multiplier)]
        col_size = patch_size * args.col_multiplier[k %
                                                    len(args.col_multiplier)]

        if isinstance(model, PatchRIDE):
            model.num_rows = row_size
            model.num_cols = col_size

        for i in range(0, data.shape[1] - row_size + 1,
                       row_size / args.overlap[k % len(args.overlap)]):
            for j in range(0, data.shape[2] - col_size + 1,
                           col_size / args.overlap[k % len(args.overlap)]):
                patches.append(data[:, i:i + row_size, j:j + col_size])
        patches = vstack(patches)

        # randomize order of patches
        if args.num_patches is not None and args.num_patches < len(patches):
            patches = patches[random_select(args.num_patches, len(patches))]
        else:
            patches = patches[permutation(len(patches))]

        # determine batch size
        if args.method[k % len(args.method)].upper() == 'SFO':
            num_batches = int(max([25, sqrt(patches.shape[0]) / 5.]))
            batch_size = patches.shape[0] // num_batches
        else:
            batch_size = args.batch_size[k % len(args.batch_size)]

        if batch_size < 1:
            raise RuntimeError('Too little data.')

        print 'Patch size: {0}x{1}'.format(row_size, col_size)
        print 'Number of patches: {0}'.format(patches.shape[0])
        print 'Batch size: {0}'.format(batch_size)

        # train recurrent image model
        print 'Training...'
        loss.append(
            model.train(
                patches,
                batch_size=batch_size,
                method=args.method[k % len(args.method)],
                num_epochs=args.num_epochs[k % len(args.num_epochs)],
                learning_rate=args.learning_rate[k % len(args.learning_rate)],
                momentum=args.momentum[k % len(args.momentum)],
                precondition=args.precondition > 0,
                train_top_layer=args.train_top_layer[k %
                                                     len(args.train_top_layer)]
                > 0,
                train_means=args.train_means[k % len(args.train_means)] > 0))

        if args.finetune[k % len(args.finetune)]:
            print 'Finetuning...'
            model.finetune(patches, num_samples_train=1000000, max_iter=500)

        if args.num_valid > 0:
            print 'Computing validation loss...'
            loss_valid.append(model.evaluate(patches_valid))

            if loss_valid[-1] > loss_valid[-2]:
                print 'Performance got worse. Stopping optimization.'
                model = model_copy
                break

            print 'Copying model...'

            model_copy = deepcopy(model)

        experiment['batch_size'] = batch_size
        experiment['args'] = args
        experiment['model'] = model
        experiment['loss_valid'] = loss_valid
        experiment['loss'] = hstack(loss) if len(loss) > 0 else []
        experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck'))

    return 0
Exemplo n.º 9
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('image',                    type=str)
	parser.add_argument('model',                    type=str)
	parser.add_argument('--init',             '-I', type=str, default=None)
	parser.add_argument('--index',            '-x', type=int, default=0,
		help='Determines which image is used when whole dataset is given instead of image.')
	parser.add_argument('--fill_region',      '-f', type=int, default=71)
	parser.add_argument('--outer_patch_size', '-p', type=int, default=19)
	parser.add_argument('--inner_patch_size', '-i', type=int, default=5)
	parser.add_argument('--stride',           '-s', type=int, default=3)
	parser.add_argument('--candidates',       '-C', type=int, default=5,
		help='The best initialization is taken out of this many initializations.')
	parser.add_argument('--num_epochs',       '-e', type=int, default=1000)
	parser.add_argument('--method',           '-m', type=str, default='SAMPLE', choices=['SAMPLE', 'MAP'])
	parser.add_argument('--step_width',       '-l', type=float, default=100.)
	parser.add_argument('--output',           '-o', type=str, default='results/inpainting/')
	parser.add_argument('--flip',             '-F', type=int, default=0,
		help='If > 0, assume horizontal symmetry. If > 1, assume vertical symmetry.')

	args = parser.parse_args(argv[1:])


	### DATA

	# load image
	if args.image.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
		image = imread(args.image)[None]
		vmin, vmax = 0, 255
	else:
		image = loadmat(args.image)['data'][[args.index]]
		vmin, vmax = image.min(), image.max()
	if image.ndim < 4:
		image = image[:, :, :, None]

	image = asarray(image, dtype=float)

	imwrite(os.path.join(args.output, 'original.png'),
		imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False))

	# remove center portion
	i_start = (image.shape[1] - args.fill_region) // 2
	j_start = (image.shape[2] - args.fill_region) // 2
	image[0,
		i_start:i_start + args.fill_region,
		j_start:j_start + args.fill_region, 0] = vmin + rand(args.fill_region, args.fill_region) * (vmax - vmin)

	imwrite(os.path.join(args.output, 'start.png'),
		imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False))


	### MODEL

	# load model
	model = Experiment(args.model)['model']
	model.verbosity = False

	# use different models for sampling and likelihoods because of SLSTM caching
	model_copy = deepcopy(model)

	# create mask indicating pixels to replace
	M = args.outer_patch_size
	N = args.inner_patch_size
	m = (M - N) // 2
	n = M - N - m
	patch_mask = zeros([M, M], dtype=bool)
	patch_mask[m:-n, m:-n] = True

	if args.init is None:
		candidates = []
		logliks = []

		for _ in range(args.candidates):
			# replace missing pixels by ancestral sampling
			patch = image[:,
				i_start - M:i_start + args.fill_region,
				j_start - M:j_start + args.fill_region + M]
			sample_mask = zeros([patch.shape[1], patch.shape[2]], dtype=bool)
			sample_mask[M:, M:-M] = True
			image[:,
				i_start - M:i_start + args.fill_region,
				j_start - M:j_start + args.fill_region + M] = model.sample(patch, mask=sample_mask,
					min_values=vmin, max_values=vmax)

			candidates.append(image.copy())
			logliks.append(model.loglikelihood(image).sum())

		image = candidates[argmax(logliks)]

		imwrite(os.path.join(args.output, 'fillin.0.png'),
			imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False))

		start_epoch = 0

	else:
		init = load(args.init)
		image = init['image']
		start_epoch = init['epoch']


	### INPAINTING

	try:
		for epoch in range(start_epoch, args.num_epochs):
			print epoch
			
			h_flipped = False
			if args.flip > 0 and rand() < .5:
				print 'Horizontal flip.'
				# flip image horizontally
				image = image[:, :, ::-1]
				j_start = image.shape[2] - j_start - args.fill_region
				h_flipped = True

			v_flipped = False
			if args.flip > 0 and rand() < .5:
				print 'Vertical flip.'
				# flip image vertically
				image = image[:, ::-1, :]
				i_start = image.shape[1] - i_start - args.fill_region
				v_flipped = True

			for i in range(i_start - m, i_start - m + args.fill_region - N + 1, args.stride):
				for j in range(j_start - m, j_start - m + args.fill_region - N + 1, args.stride):
					patch = image[:, i:i + M, j:j + M]

					if args.method == 'SAMPLE':
						# proposal
						patch_pr, logq_pr = model.sample(patch.copy(), mask=patch_mask,
							min_values=vmin, max_values=vmax, return_loglik=True)

						# conditional log-density
						logq = model_copy._logq(patch, patch_mask)

						# joint log-densities
						logp = model_copy.loglikelihood(patch).sum()
						logp_pr = model_copy.loglikelihood(patch_pr).sum()

						if rand() < exp(logp_pr - logp - logq_pr + logq):
							# accept proposal
							patch[:] = patch_pr

					else:
						# gradient step
						grad = model.gradient(patch)[1]
						patch[:, patch_mask] += grad[:, patch_mask] * args.step_width

			# flip back
			if h_flipped:
				image = image[:, :, ::-1]
				j_start = image.shape[2] - j_start - args.fill_region
			if v_flipped:
				image = image[:, ::-1, :]
				i_start = image.shape[1] - i_start - args.fill_region

			imwrite(os.path.join(args.output, 'fillin.{0}.png'.format(epoch + 1)),
				imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False))

	except KeyboardInterrupt:
		pass

	savez(os.path.join(args.output, 'final.npz'), image=image, epoch=epoch)

	return 0