def extract(images): data = [] for i in range(images.shape[0]): data.append( generate_data_from_image(images[i], input_mask, output_mask)) inputs, outputs = zip(*data) return hstack(inputs), hstack(outputs)
def train_model(img, input_mask, output_mask): # generate data inputs, outputs = generate_data_from_image(img, input_mask, output_mask, 120000) # split data into training and validation sets data_train = inputs[:, :100000], outputs[:, :100000] data_valid = inputs[:, 100000:], outputs[:, 100000:] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM(dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=30) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) return model, pre
def train_model(img, input_mask, output_mask): # generate data inputs, outputs = generate_data_from_image( img, input_mask, output_mask, 120000) # split data into training and validation sets data_train = inputs[:, :100000], outputs[:, :100000] data_valid = inputs[:, 100000:], outputs[:, 100000:] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM( dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=30) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) return model, pre
def main(argv): # load image and turn into grayscale img = rgb2gray(imread('media/newyork.png')) # generate data inputs, outputs = generate_data_from_image( img, input_mask, output_mask, 220000) # split data into training, test, and validation sets inputs = split(inputs, [100000, 200000], 1) outputs = split(outputs, [100000, 200000], 1) data_train = inputs[0], outputs[0] data_test = inputs[1], outputs[1] data_valid = inputs[2], outputs[2] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM( dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=32) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) # evaluate model print 'Average log-likelihood: {0:.4f} [bit/px]'.format( -model.evaluate(data_test[0], data_test[1], pre)) # synthesize a new image img_sample = sample_image(img, model, input_mask, output_mask, pre) imwrite('newyork_sample.png', img_sample, cmap='gray', vmin=min(img), vmax=max(img)) # save model with open('image_model.pck', 'wb') as handle: dump({ 'model': model, 'input_mask': input_mask, 'output_mask': output_mask}, handle, 1) return 0
def process(image): inputs, outputs = generate_data_from_image(image, self.input_mask, self.output_mask) inputs = asarray(inputs.T.reshape( image.shape[0] - self.input_mask.shape[0] + 1, image.shape[1] - self.input_mask.shape[1] + 1, -1), dtype='float32') outputs = asarray(outputs.T.reshape( image.shape[0] - self.input_mask.shape[0] + 1, image.shape[1] - self.input_mask.shape[1] + 1, -1), dtype='float32') return inputs, outputs
def process(image): inputs, outputs = generate_data_from_image( image, self.input_mask, self.output_mask) inputs = asarray( inputs.T.reshape( image.shape[0] - self.input_mask.shape[0] + 1, image.shape[1] - self.input_mask.shape[1] + 1, -1), dtype='float32') outputs = asarray( outputs.T.reshape( image.shape[0] - self.input_mask.shape[0] + 1, image.shape[1] - self.input_mask.shape[1] + 1, -1), dtype='float32') return inputs, outputs
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('model', type=str) parser.add_argument('--num_data', '-N', type=int, default=5000000) parser.add_argument('--data', '-d', type=str, default='data/vanhateren_deq2_test.mat') parser.add_argument('--verbosity', '-v', type=int, default=1) args = parser.parse_args(argv[1:]) ### LOAD RESULTS experiment = Experiment(args.model) ### DATA HANDLING if args.verbosity > 0: print 'Loading data...' # load data images = loadmat(args.data)['data'] # causal neighborhood definition input_mask = experiment['input_mask'] output_mask = experiment['output_mask'] # extract causal neighborhoods num_samples = args.num_data // images.shape[0] data = [] for i in range(images.shape[0]): data.append( generate_data_from_image(images[i], input_mask, output_mask, num_samples)) inputs, outputs = zip(*data) inputs = hstack(inputs) outputs = hstack(outputs) ### MODEL EVALUATION crossentropy = experiment['model'].evaluate(inputs, outputs, experiment['preconditioner']) print 'Cross-entropy: {0:.4f} [bit/px]'.format(crossentropy) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('model', type=str) parser.add_argument('--num_data', '-N', type=int, default=5000000) parser.add_argument('--data', '-d', type=str, default='data/vanhateren_deq2_test.mat') parser.add_argument('--verbosity', '-v', type=int, default=1) args = parser.parse_args(argv[1:]) ### LOAD RESULTS experiment = Experiment(args.model) ### DATA HANDLING if args.verbosity > 0: print 'Loading data...' # load data images = loadmat(args.data)['data'] # causal neighborhood definition input_mask = experiment['input_mask'] output_mask = experiment['output_mask'] # extract causal neighborhoods num_samples = args.num_data // images.shape[0] data = [] for i in range(images.shape[0]): data.append(generate_data_from_image( images[i], input_mask, output_mask, num_samples)) inputs, outputs = zip(*data) inputs = hstack(inputs) outputs = hstack(outputs) ### MODEL EVALUATION crossentropy = experiment['model'].evaluate(inputs, outputs, experiment['preconditioner']) print 'Cross-entropy: {0:.4f} [bit/px]'.format(crossentropy) return 0
def extract(image): patches = generate_data_from_image(image, input_mask, output_mask, num_samples_per_image)[0] return patches
def main(argv): # load image and turn into grayscale img = rgb2gray(imread('media/newyork.png')) # generate data inputs, outputs = generate_data_from_image(img, input_mask, output_mask, 220000) # split data into training, test, and validation sets inputs = split(inputs, [100000, 200000], 1) outputs = split(outputs, [100000, 200000], 1) data_train = inputs[0], outputs[0] data_test = inputs[1], outputs[1] data_valid = inputs[2], outputs[2] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM(dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=32) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) # evaluate model print 'Average log-likelihood: {0:.4f} [bit/px]'.format( -model.evaluate(data_test[0], data_test[1], pre)) # synthesize a new image img_sample = sample_image(img, model, input_mask, output_mask, pre) imwrite('newyork_sample.png', img_sample, cmap='gray', vmin=min(img), vmax=max(img)) # save model with open('image_model.pck', 'wb') as handle: dump( { 'model': model, 'input_mask': input_mask, 'output_mask': output_mask }, handle, 1) return 0
def sample(self, images, min_values=None, max_values=None): """ Sample one or several images. @type images: C{ndarray} @param images: an array or a list of images to initialize pixels at boundaries """ if min_values is not None: min_values = asarray(min_values).reshape(1, 1, 1, -1) if max_values is not None: max_values = asarray(max_values).reshape(1, 1, 1, -1) # reshape images into four-dimensional arrays shape = images.shape if images.ndim == 2: images = images[None, :, :, None] elif images.ndim == 3: if self.num_channels > 1: images = images[None] else: images = images[:, :, :, None] # create spatial LSTMs for sampling slstm = [] for l in range(self.num_layers): slstm.append( SLSTM(num_rows=1, num_cols=1, num_channels=sum(self.input_mask) if l < 1 else self.num_hiddens, num_hiddens=self.num_hiddens, batch_size=images.shape[0], nonlinearity=self.nonlinearity, extended=self.extended, slstm=self.slstm[l], verbosity=self.verbosity)) # container for hidden and memory unit activations hiddens = [] memory = [] for l in range(self.num_layers): hiddens.append(defaultdict(lambda: 0.)) memory.append(defaultdict(lambda: 0.)) # locate output pixel for i_off, j_off in zip(range(self.output_mask.shape[0]), range(self.output_mask.shape[1])): if any(self.output_mask[i_off, j_off]): break for i in range(images.shape[1] - self.input_mask.shape[0] + 1): for j in range(images.shape[2] - self.input_mask.shape[1] + 1): # extract patches from images patches = images[:, i:i + self.input_mask.shape[0], j:j + self.input_mask.shape[1]] # extract causal neighborhoods from patches inputs = [] for k in range(images.shape[0]): inputs.append( generate_data_from_image(patches[k, :, :], self.input_mask, self.output_mask)[0]) inputs = asarray(inputs) inputs = inputs.reshape(inputs.shape[0], 1, 1, -1) if self.preconditioner: inputs = self._precondition(inputs) # set hidden unit activations for l in range(self.num_layers): slstm[l].net.blobs['h_init_i_jm1'].data[:] = hiddens[l][i, j - 1] slstm[l].net.blobs['h_init_im1_j'].data[:] = hiddens[l][i - 1, j] slstm[l].net.blobs['c_init_i_jm1'].data[:] = memory[l][i, j - 1] slstm[l].net.blobs['c_init_im1_j'].data[:] = memory[l][i - 1, j] # compute hidden unit activations activations = inputs for l in range(self.num_layers): activations = slstm[l].forward(activations) # store hidden unit activations for l in range(self.num_layers): hiddens[l][i, j] = slstm[l].net.blobs['outputs'].data.copy() memory[l][i, j] = slstm[l].net.blobs['c_0_0'].data.copy() for _ in range(10): # sample MCGSM outputs = self.mcgsm.sample(hiddens[-1][i, j].reshape( -1, self.num_hiddens).T) outputs = outputs.T.reshape(outputs.shape[1], 1, 1, outputs.shape[0]) if not any(isnan(outputs)): break print 'Warning: NaNs detected.' if self.preconditioner: inputs, outputs = self._precondition_inverse( inputs, outputs) if max_values is not None: outputs[outputs > max_values] = max_values[ outputs > max_values] if min_values is not None: outputs[outputs < min_values] = min_values[ outputs < min_values] # insert sampled pixels into images images[:, i + i_off, j + j_off][self.output_mask[i_off, j_off]] = outputs return images.reshape(*shape)
def test_generate_data_from_image(self): xmask = asarray([ [1, 1], [1, 0]], dtype='bool') ymask = asarray([ [0, 0], [0, 1]], dtype='bool') img = asarray([ [1., 2.], [3., 4.]]) inputs, outputs = generate_data_from_image(img, xmask, ymask, 1) self.assertLess(max(abs(inputs - [[1.], [2.], [3.]])), 1e-10) self.assertLess(max(abs(outputs - [[4.]])), 1e-10) inputs, outputs = generate_data_from_image(randn(512, 512), xmask, ymask, 100) self.assertEqual(inputs.shape[0], 3) self.assertEqual(inputs.shape[1], 100) self.assertEqual(outputs.shape[0], 1) self.assertEqual(outputs.shape[1], 100) inputs, outputs = generate_data_from_image(randn(512, 512, 2), xmask, ymask, 100) self.assertEqual(inputs.shape[0], 6) self.assertEqual(inputs.shape[1], 100) self.assertEqual(outputs.shape[0], 2) self.assertEqual(outputs.shape[1], 100) # multi-channel masks xmask = dstack([ asarray([ [1, 1], [1, 0]], dtype='bool'), asarray([ [1, 1], [1, 0]], dtype='bool')]) ymask = dstack([ asarray([ [0, 0], [0, 1]], dtype='bool'), asarray([ [0, 0], [0, 1]], dtype='bool')]) inputs, outputs = generate_data_from_image(randn(512, 512, 2), xmask, ymask, 100) self.assertEqual(inputs.shape[0], 6) self.assertEqual(inputs.shape[1], 100) self.assertEqual(outputs.shape[0], 2) self.assertEqual(outputs.shape[1], 100) # invalid masks due to overlap xmask = asarray([ [1, 1], [1, 1]], dtype='bool') ymask = asarray([ [0, 0], [0, 1]], dtype='bool') self.assertRaises(Exception, generate_data_from_image, img, xmask, ymask, 1) # test extracting of xmask = asarray([ [1, 1], [1, 1], [1, 0]], dtype='bool') ymask = asarray([ [0, 0], [0, 0], [0, 1]], dtype='bool') # test extracting of all possible inputs and outputs img = randn(64, 64) inputs, outputs = generate_data_from_image(img, xmask, ymask) # try reconstructing image from outputs self.assertLess(max(abs(outputs.reshape(62, 63, order='C') - img[2:, 1:])), 1e-16) img = randn(64, 64, 3) inputs, outputs = generate_data_from_image(img, xmask, ymask) img_rec = outputs.reshape(3, 62, 63, order='C') img_rec = transpose(img_rec, [1, 2, 0]) self.assertLess(max(abs(img_rec - img[2:, 1:])), 1e-16)
def extract(image): return generate_data_from_image(image, input_mask, output_mask, num_samples)
def sample(self, images, min_values=None, max_values=None): """ Sample one or several images. @type images: C{ndarray} @param images: an array or a list of images to initialize pixels at boundaries """ if min_values is not None: min_values = asarray(min_values).reshape(1, 1, 1, -1) if max_values is not None: max_values = asarray(max_values).reshape(1, 1, 1, -1) # reshape images into four-dimensional arrays shape = images.shape if images.ndim == 2: images = images[None, :, :, None] elif images.ndim == 3: if self.num_channels > 1: images = images[None] else: images = images[:, :, :, None] # create spatial LSTMs for sampling slstm = [] for l in range(self.num_layers): slstm.append(SLSTM( num_rows=1, num_cols=1, num_channels=sum(self.input_mask) if l < 1 else self.num_hiddens, num_hiddens=self.num_hiddens, batch_size=images.shape[0], nonlinearity=self.nonlinearity, extended=self.extended, slstm=self.slstm[l], verbosity=self.verbosity)) # container for hidden and memory unit activations hiddens = [] memory = [] for l in range(self.num_layers): hiddens.append(defaultdict(lambda: 0.)) memory.append(defaultdict(lambda: 0.)) # locate output pixel for i_off, j_off in zip( range(self.output_mask.shape[0]), range(self.output_mask.shape[1])): if any(self.output_mask[i_off, j_off]): break for i in range(images.shape[1] - self.input_mask.shape[0] + 1): for j in range(images.shape[2] - self.input_mask.shape[1] + 1): # extract patches from images patches = images[:, i:i + self.input_mask.shape[0], j:j + self.input_mask.shape[1]] # extract causal neighborhoods from patches inputs = [] for k in range(images.shape[0]): inputs.append( generate_data_from_image( patches[k, :, :], self.input_mask, self.output_mask)[0]) inputs = asarray(inputs) inputs = inputs.reshape(inputs.shape[0], 1, 1, -1) if self.preconditioner: inputs = self._precondition(inputs) # set hidden unit activations for l in range(self.num_layers): slstm[l].net.blobs['h_init_i_jm1'].data[:] = hiddens[l][i, j - 1] slstm[l].net.blobs['h_init_im1_j'].data[:] = hiddens[l][i - 1, j] slstm[l].net.blobs['c_init_i_jm1'].data[:] = memory[l][i, j - 1] slstm[l].net.blobs['c_init_im1_j'].data[:] = memory[l][i - 1, j] # compute hidden unit activations activations = inputs for l in range(self.num_layers): activations = slstm[l].forward(activations) # store hidden unit activations for l in range(self.num_layers): hiddens[l][i, j] = slstm[l].net.blobs['outputs'].data.copy() memory[l][i, j] = slstm[l].net.blobs['c_0_0'].data.copy() for _ in range(10): # sample MCGSM outputs = self.mcgsm.sample( hiddens[-1][i, j].reshape(-1, self.num_hiddens).T) outputs = outputs.T.reshape(outputs.shape[1], 1, 1, outputs.shape[0]) if not any(isnan(outputs)): break print 'Warning: NaNs detected.' if self.preconditioner: inputs, outputs = self._precondition_inverse(inputs, outputs) if max_values is not None: outputs[outputs > max_values] = max_values[outputs > max_values] if min_values is not None: outputs[outputs < min_values] = min_values[outputs < min_values] # insert sampled pixels into images images[:, i + i_off, j + j_off][self.output_mask[i_off, j_off]] = outputs return images.reshape(*shape)
def extract(image): return generate_data_from_image( image, input_mask, output_mask, num_samples)
def test_generate_data_from_image(self): xmask = asarray([[1, 1], [1, 0]], dtype='bool') ymask = asarray([[0, 0], [0, 1]], dtype='bool') img = asarray([[1., 2.], [3., 4.]]) inputs, outputs = generate_data_from_image(img, xmask, ymask, 1) self.assertLess(max(abs(inputs - [[1.], [2.], [3.]])), 1e-10) self.assertLess(max(abs(outputs - [[4.]])), 1e-10) inputs, outputs = generate_data_from_image(randn(512, 512), xmask, ymask, 100) self.assertEqual(inputs.shape[0], 3) self.assertEqual(inputs.shape[1], 100) self.assertEqual(outputs.shape[0], 1) self.assertEqual(outputs.shape[1], 100) inputs, outputs = generate_data_from_image(randn(512, 512, 2), xmask, ymask, 100) self.assertEqual(inputs.shape[0], 6) self.assertEqual(inputs.shape[1], 100) self.assertEqual(outputs.shape[0], 2) self.assertEqual(outputs.shape[1], 100) # multi-channel masks xmask = dstack([ asarray([[1, 1], [1, 0]], dtype='bool'), asarray([[1, 1], [1, 0]], dtype='bool') ]) ymask = dstack([ asarray([[0, 0], [0, 1]], dtype='bool'), asarray([[0, 0], [0, 1]], dtype='bool') ]) inputs, outputs = generate_data_from_image(randn(512, 512, 2), xmask, ymask, 100) self.assertEqual(inputs.shape[0], 6) self.assertEqual(inputs.shape[1], 100) self.assertEqual(outputs.shape[0], 2) self.assertEqual(outputs.shape[1], 100) # invalid masks due to overlap xmask = asarray([[1, 1], [1, 1]], dtype='bool') ymask = asarray([[0, 0], [0, 1]], dtype='bool') self.assertRaises(Exception, generate_data_from_image, img, xmask, ymask, 1) # test extracting of xmask = asarray([[1, 1], [1, 1], [1, 0]], dtype='bool') ymask = asarray([[0, 0], [0, 0], [0, 1]], dtype='bool') # test extracting of all possible inputs and outputs img = randn(64, 64) inputs, outputs = generate_data_from_image(img, xmask, ymask) # try reconstructing image from outputs self.assertLess( max(abs(outputs.reshape(62, 63, order='C') - img[2:, 1:])), 1e-16) img = randn(64, 64, 3) inputs, outputs = generate_data_from_image(img, xmask, ymask) img_rec = outputs.reshape(3, 62, 63, order='C') img_rec = transpose(img_rec, [1, 2, 0]) self.assertLess(max(abs(img_rec - img[2:, 1:])), 1e-16)
def sample(self, images, min_values=None, max_values=None, mask=None, return_loglik=False): """ Sample one or several images. @type images: C{ndarray}/C{list} @param images: an array or a list of images to initialize pixels at boundaries @type min_values: C{ndarray}/C{list} @param min_values: list of lower bounds for each channel (for increased stability) @type max_values: C{ndarray}/C{list} @param max_values: list of upper bounds for each channel (for increased stability) @type mask: C{ndarray} @param mask: replace only certain pixels indicated by this Boolean mask @rtype: C{ndarray} @return: sampled images of the size of the images given as input """ # reshape images into four-dimensional arrays shape = images.shape if images.ndim == 2: images = images[None, :, :, None] elif images.ndim == 3: if self.num_channels > 1: images = images[None] else: images = images[:, :, :, None] # create spatial LSTMs for sampling for l in range(self.num_layers): if self.slstm[l].num_rows != 1 \ or self.slstm[l].num_cols != 1 \ or self.slstm[l].batch_size != images.shape[0]: self.slstm[l] = SLSTM(num_rows=1, num_cols=1, num_channels=sum(self.input_mask) if l < 1 else self.num_hiddens, num_hiddens=self.num_hiddens, batch_size=images.shape[0], nonlinearity=self.nonlinearity, slstm=self.slstm[l], extended=self.extended) # container for hidden and memory unit activations hiddens = [] memory = [] for l in range(self.num_layers): hiddens.append(defaultdict(lambda: 0.)) memory.append(defaultdict(lambda: 0.)) # locate output pixel for i_off, j_off in zip(range(self.output_mask.shape[0]), range(self.output_mask.shape[1])): if any(self.output_mask[i_off, j_off]): break if min_values is not None: min_values = asarray(min_values).reshape(1, 1, 1, -1) if self.output_mask.ndim > 2: min_values = min_values[:, :, :, self.output_mask[i_off, j_off]] if max_values is not None: max_values = asarray(max_values).reshape(1, 1, 1, -1) if self.output_mask.ndim > 2: max_values = max_values[:, :, :, self.output_mask[i_off, j_off]] # unnormalized log-density of generated sample logq = 0. for i in range(images.shape[1] - self.input_mask.shape[0] + 1): for j in range(images.shape[2] - self.input_mask.shape[1] + 1): # extract patches from images patches = images[:, i:i + self.input_mask.shape[0], j:j + self.input_mask.shape[1]] # extract causal neighborhoods from patches inputs = [] for k in range(images.shape[0]): inputs.append( generate_data_from_image(patches[k, :, :], self.input_mask, self.output_mask)[0]) inputs = asarray(inputs) inputs = inputs.reshape(inputs.shape[0], 1, 1, -1) if self.preconditioner: inputs = self._precondition(inputs) # set hidden unit activations for l in range(self.num_layers): self.slstm[l].net.blobs['h_init_i_jm1'].data[:] = hiddens[ l][i, j - 1] self.slstm[l].net.blobs['h_init_im1_j'].data[:] = hiddens[ l][i - 1, j] self.slstm[l].net.blobs['c_init_i_jm1'].data[:] = memory[ l][i, j - 1] self.slstm[l].net.blobs['c_init_im1_j'].data[:] = memory[ l][i - 1, j] # compute hidden unit activations activations = inputs for l in range(self.num_layers): activations = self.slstm[l].forward(activations) # store hidden unit activations for l in range(self.num_layers): hiddens[l][ i, j] = self.slstm[l].net.blobs['outputs'].data.copy() memory[l][ i, j] = self.slstm[l].net.blobs['c_0_0'].data.copy() if mask is not None and not mask[i + i_off, j + j_off]: # skip sampling of this pixel continue for _ in range(10): # sample MCGSM outputs = self.mcgsm.sample(hiddens[-1][i, j].reshape( -1, self.num_hiddens).T) if not any(isnan(outputs)): break print 'Warning: NaNs detected.' if return_loglik: logq += self.mcgsm.loglikelihood( hiddens[-1][i, j].reshape(-1, self.num_hiddens).T, outputs) outputs = outputs.T.reshape(outputs.shape[1], 1, 1, outputs.shape[0]) if self.preconditioner: inputs, outputs = self._precondition_inverse( inputs, outputs) if max_values is not None: outputs[outputs > max_values] = max_values[ outputs > max_values] if min_values is not None: outputs[outputs < min_values] = min_values[ outputs < min_values] # insert sampled pixels into images if self.output_mask.ndim > 2: images[:, i + i_off, j + j_off][:, self.output_mask[i_off, j_off]] = outputs else: images[:, i + i_off, j + j_off] = outputs images = images.reshape(*shape) if return_loglik: return images, logq return images
def sample(self, images, min_values=None, max_values=None, mask=None, return_loglik=False): """ Sample one or several images. @type images: C{ndarray}/C{list} @param images: an array or a list of images to initialize pixels at boundaries @type min_values: C{ndarray}/C{list} @param min_values: list of lower bounds for each channel (for increased stability) @type max_values: C{ndarray}/C{list} @param max_values: list of upper bounds for each channel (for increased stability) @type mask: C{ndarray} @param mask: replace only certain pixels indicated by this Boolean mask @rtype: C{ndarray} @return: sampled images of the size of the images given as input """ # reshape images into four-dimensional arrays shape = images.shape if images.ndim == 2: images = images[None, :, :, None] elif images.ndim == 3: if self.num_channels > 1: images = images[None] else: images = images[:, :, :, None] # create spatial LSTMs for sampling for l in range(self.num_layers): if ( self.slstm[l].num_rows != 1 or self.slstm[l].num_cols != 1 or self.slstm[l].batch_size != images.shape[0] ): self.slstm[l] = SLSTM( num_rows=1, num_cols=1, num_channels=sum(self.input_mask) if l < 1 else self.num_hiddens, num_hiddens=self.num_hiddens, batch_size=images.shape[0], nonlinearity=self.nonlinearity, slstm=self.slstm[l], extended=self.extended, ) # container for hidden and memory unit activations hiddens = [] memory = [] for l in range(self.num_layers): hiddens.append(defaultdict(lambda: 0.0)) memory.append(defaultdict(lambda: 0.0)) # locate output pixel for i_off, j_off in zip(range(self.output_mask.shape[0]), range(self.output_mask.shape[1])): if any(self.output_mask[i_off, j_off]): break if min_values is not None: min_values = asarray(min_values).reshape(1, 1, 1, -1) if self.output_mask.ndim > 2: min_values = min_values[:, :, :, self.output_mask[i_off, j_off]] if max_values is not None: max_values = asarray(max_values).reshape(1, 1, 1, -1) if self.output_mask.ndim > 2: max_values = max_values[:, :, :, self.output_mask[i_off, j_off]] # unnormalized log-density of generated sample logq = 0.0 for i in range(images.shape[1] - self.input_mask.shape[0] + 1): for j in range(images.shape[2] - self.input_mask.shape[1] + 1): # extract patches from images patches = images[:, i : i + self.input_mask.shape[0], j : j + self.input_mask.shape[1]] # extract causal neighborhoods from patches inputs = [] for k in range(images.shape[0]): inputs.append(generate_data_from_image(patches[k, :, :], self.input_mask, self.output_mask)[0]) inputs = asarray(inputs) inputs = inputs.reshape(inputs.shape[0], 1, 1, -1) if self.preconditioner: inputs = self._precondition(inputs) # set hidden unit activations for l in range(self.num_layers): self.slstm[l].net.blobs["h_init_i_jm1"].data[:] = hiddens[l][i, j - 1] self.slstm[l].net.blobs["h_init_im1_j"].data[:] = hiddens[l][i - 1, j] self.slstm[l].net.blobs["c_init_i_jm1"].data[:] = memory[l][i, j - 1] self.slstm[l].net.blobs["c_init_im1_j"].data[:] = memory[l][i - 1, j] # compute hidden unit activations activations = inputs for l in range(self.num_layers): activations = self.slstm[l].forward(activations) # store hidden unit activations for l in range(self.num_layers): hiddens[l][i, j] = self.slstm[l].net.blobs["outputs"].data.copy() memory[l][i, j] = self.slstm[l].net.blobs["c_0_0"].data.copy() if mask is not None and not mask[i + i_off, j + j_off]: # skip sampling of this pixel continue for _ in range(10): # sample MCGSM outputs = self.mcgsm.sample(hiddens[-1][i, j].reshape(-1, self.num_hiddens).T) if not any(isnan(outputs)): break print "Warning: NaNs detected." if return_loglik: logq += self.mcgsm.loglikelihood(hiddens[-1][i, j].reshape(-1, self.num_hiddens).T, outputs) outputs = outputs.T.reshape(outputs.shape[1], 1, 1, outputs.shape[0]) if self.preconditioner: inputs, outputs = self._precondition_inverse(inputs, outputs) if max_values is not None: outputs[outputs > max_values] = max_values[outputs > max_values] if min_values is not None: outputs[outputs < min_values] = min_values[outputs < min_values] # insert sampled pixels into images if self.output_mask.ndim > 2: images[:, i + i_off, j + j_off][:, self.output_mask[i_off, j_off]] = outputs else: images[:, i + i_off, j + j_off] = outputs images = images.reshape(*shape) if return_loglik: return images, logq return images