def create_fixed_gen(self, subset): if subset == "train": y = self.y_train image_shapes = self.info_train elif subset == "valid": y = self.y_valid image_shapes = self.info_valid elif subset == "test": y = self.y_test image_shapes = self.info_test else: raise Exception num_batches = int(np.ceil(float(y.shape[0]) / self.chunk_size)) def fixed_gen(): for i in range(num_batches): if i == num_batches - 1: chunk_x1 = np.zeros((self.chunk_size, y.shape[1]), dtype=np.float32) chunk_x2 = np.zeros((self.chunk_size, image_shapes.shape[1]), dtype=np.float32) chunk_length = y.shape[0] - (num_batches - 1) * self.chunk_size chunk_x1[:chunk_length] = y[i * self.chunk_size:] chunk_x2[:chunk_length] = image_shapes[i * self.chunk_size:] else: chunk_x1 = y[i * self.chunk_size: (i + 1) * self.chunk_size] chunk_x2 = image_shapes[i * self.chunk_size: (i + 1) * self.chunk_size] chunk_length = self.chunk_size yield [chunk_x1, chunk_x2], chunk_length return buffering.buffered_gen_threaded(fixed_gen())
def get_preds_targs_tta_feat(data_iterator, prelabel=''): print('Data') print('n', sys.argv[2], ': %d' % data_iterator.nsamples) for n, (x_chunk, y_chunk, id_chunk) in enumerate( buffering.buffered_gen_threaded(data_iterator.generate())): # load chunk to GPU # if n == 10: # break inputs, labels = Variable(torch.from_numpy(x_chunk).cuda(), volatile=True), Variable( torch.from_numpy(y_chunk).cuda(), volatile=True) predictions = model.l_out(inputs, feat=True) predictions = predictions.cpu().data.numpy() # final_prediction = np.mean(predictions.cpu().data.numpy(), axis=0) # avg_loss = np.mean(loss, axis=0) # validation_losses.append(avg_loss) # print(predictions.shape) # print(id_chunk) for i in range(predictions.shape[0]): file = open( os.path.join(outputs_path, prelabel + str(id_chunk) + "_" + str(i) + ".npy"), "wb") np.save(file, predictions[i]) file.close() if n % 1000 == 0: print(n, 'batches processed')
def create_random_gen(self, *args): # we ignore the args train_chunk_size = int(round(self.chunk_size * self.train_sample_weight)) pseudo_chunk_size = self.chunk_size - train_chunk_size train_gen = data.multiscale_patches_gen_augmented(self.images_train, self.labels_train, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=train_chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) pseudo_gen = data.multiscale_patches_gen_augmented(self.images_pseudo, self.labels_pseudo, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=pseudo_chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) def random_gen(): indices = np.arange(self.chunk_size) for a, b in itertools.izip(train_gen, pseudo_gen): (chunk_x1, chunk_y1, chunk_shape), (chunk_x2, chunk_y2, _) = a, b np.random.shuffle(indices) chunk_y = np.concatenate((chunk_y1, chunk_y2), 0)[indices] chunk_x = [] for k in xrange(len(chunk_x1)): chunk_x += [np.concatenate((chunk_x1[k], chunk_x2[k]), 0)[indices]] chunk_x[k] -= self.zmuv_means[k] chunk_x[k] /= self.zmuv_stds[k] chunk_x[k] = chunk_x[k][:, None, :, :] yield chunk_x, chunk_y return buffering.buffered_gen_threaded(random_gen())
def create_random_gen(self): def random_gen(): for i in range(self.num_chunks_train): indices = np.random.randint(self.y_train.shape[0], size=self.chunk_size) yield [self.y_train[indices], self.info_train[indices]], self.labels_train[indices] return buffering.buffered_gen_threaded(random_gen())
def get_preds_targs(data_iterator): print 'Data' print 'n', sys.argv[2], ': %d' % data_iterator.nsamples validation_losses = [] preds = [] targs = [] ids = [] for n, (x_chunk, y_chunk, id_chunk) in enumerate(buffering.buffered_gen_threaded(data_iterator.generate())): # load chunk to GPU # if n == 100: # break x_shared.set_value(x_chunk) y_shared.set_value(y_chunk) loss, predictions = iter_get() validation_losses.append(loss) targs.append(y_chunk) ids.append(id_chunk) if feat: for idx, img_id in enumerate(id_chunk): np.savez(open(outputs_path+'/'+str(img_id)+'.npz', 'w') , features = predictions[idx]) preds.append(predictions) #print id_chunk, targets, loss if n%50 ==0: print n, 'batches processed' preds = np.concatenate(preds) targs = np.concatenate(targs) ids = np.concatenate(ids) print 'Validation loss', np.mean(validation_losses) return preds, targs, ids
def _test_data_generator(): #testing data iterator p_transform = {'patch_size': (256, 256), 'channels': 4, 'n_labels': 17} rng = np.random.RandomState(42) def data_prep_fun(x): x = np.array(x) x = np.swapaxes(x, 0, 2) return x def label_prep_fun(labels): return labels folds = app.make_stratified_split(no_folds=5) all_ids = folds[0] + folds[1] + folds[2] + folds[3] + folds[4] bad_ids = [18772, 28173, 5023] img_ids = [x for x in all_ids if x not in bad_ids] dg = DataGenerator(dataset='train-jpg', batch_size=10, img_ids=img_ids, p_transform=p_transform, data_prep_fun=data_prep_fun, label_prep_fun=label_prep_fun, rng=rng, full_batch=True, random=False, infinite=False) for (x_chunk, y_chunk, id_train) in buffering.buffered_gen_threaded(dg.generate()): print x_chunk.shape, y_chunk.shape, id_train
def create_random_gen(self, images, labels): gen = data.rescaled_patches_gen_augmented(images, labels, self.estimate_scale, patch_size=self.patch_size, chunk_size=self.chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) def random_gen(): for chunk_x, chunk_y, chunk_shape in gen: yield [chunk_x[:, None, :, :]], chunk_y return buffering.buffered_gen_threaded(random_gen())
def create_fixed_gen(self, images, augment=False): augmentation_transforms = self.augmentation_transforms_test if augment else None gen = data.rescaled_patches_gen_fixed(images, self.estimate_scale, patch_size=self.patch_size, chunk_size=self.chunk_size, augmentation_transforms=augmentation_transforms) def fixed_gen(): for chunk_x, chunk_shape, chunk_length in gen: yield [chunk_x[:, None, :, :]], chunk_length return buffering.buffered_gen_threaded(fixed_gen())
def get_preds_targs_tta(data_iterator, aggregation="mean", threshold=0.5): print 'Data' print 'n', sys.argv[2], ': %d' % data_iterator.nsamples # validation_losses = [] preds = [] targs = [] ids = [] for n, (x_chunk, y_chunk, id_chunk) in enumerate( buffering.buffered_gen_threaded(data_iterator.generate())): # load chunk to GPU # if n == 10: # break inputs, labels = Variable(torch.from_numpy(x_chunk).cuda(), volatile=True), Variable( torch.from_numpy(y_chunk).cuda(), volatile=True) predictions = model.l_out(inputs) predictions = predictions.cpu().data.numpy() if aggregation == "majority": final_prediction = np.zeros((predictions.shape[1], )) for dim in range(predictions.shape[1]): count = np.bincount(predictions[:, dim] > threshold, minlength=2) final_prediction[ dim] = 1 if count[1] >= predictions.shape[0] / 2.0 else 0 elif aggregation == "mean": final_prediction = np.mean(predictions, axis=0) # avg_loss = np.mean(loss, axis=0) # validation_losses.append(avg_loss) targs.append(y_chunk[0]) ids.append(id_chunk) preds.append(final_prediction) if n % 1000 == 0: print n, 'batches processed' preds = np.stack(preds) targs = np.stack(targs) ids = np.stack(ids) print preds.shape print targs.shape print ids.shape # print 'Validation loss', np.mean(validation_losses) return preds, targs, ids
def create_random_gen(self, images, labels): gen = data.multiscale_patches_gen_augmented(images, labels, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=self.chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) def random_gen(): for chunks_x, chunk_y, chunk_shape in gen: for k in xrange(len(chunks_x)): chunks_x[k] -= self.zmuv_means[k] chunks_x[k] /= self.zmuv_stds[k] chunks_x[k] = chunks_x[k][:, None, :, :] yield chunks_x, chunk_y return buffering.buffered_gen_threaded(random_gen())
def create_fixed_gen(self, images, augment=False): augmentation_transforms = self.augmentation_transforms_test if augment else None gen = data.rescaled_patches_gen_fixed( images, self.estimate_scale, patch_size=self.patch_size, chunk_size=self.chunk_size, augmentation_transforms=augmentation_transforms) def fixed_gen(): for chunk_x, chunk_shape, chunk_length in gen: yield [chunk_x[:, None, :, :]], chunk_length return buffering.buffered_gen_threaded(fixed_gen())
def create_random_gen(self, images, labels): gen = data.rescaled_patches_gen_augmented( images, labels, self.estimate_scale, patch_size=self.patch_size, chunk_size=self.chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) def random_gen(): for chunk_x, chunk_y, chunk_shape in gen: yield [chunk_x[:, None, :, :]], chunk_y return buffering.buffered_gen_threaded(random_gen())
def create_fixed_gen(self, images, augment=False): augmentation_transforms = self.augmentation_transforms_test if augment else None gen = data.multiscale_patches_gen_fixed(images, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=self.chunk_size, augmentation_transforms=augmentation_transforms) def fixed_gen(): for chunks_x, chunk_shape, chunk_length in gen: for k in xrange(len(chunks_x)): chunks_x[k] -= self.zmuv_means[k] chunks_x[k] /= self.zmuv_stds[k] chunks_x[k] = chunks_x[k][:, None, :, :] yield chunks_x, chunk_length return buffering.buffered_gen_threaded(fixed_gen())
def create_random_gen(self, *args): # we ignore the args train_chunk_size = int( round(self.chunk_size * self.train_sample_weight)) pseudo_chunk_size = self.chunk_size - train_chunk_size train_gen = data.multiscale_patches_gen_augmented( self.images_train, self.labels_train, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=train_chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) pseudo_gen = data.multiscale_patches_gen_augmented( self.images_pseudo, self.labels_pseudo, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=pseudo_chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) def random_gen(): indices = np.arange(self.chunk_size) for a, b in zip(train_gen, pseudo_gen): (chunk_x1, chunk_y1, chunk_shape), (chunk_x2, chunk_y2, _) = a, b np.random.shuffle(indices) chunk_y = np.concatenate((chunk_y1, chunk_y2), 0)[indices] chunk_x = [] for k in range(len(chunk_x1)): chunk_x += [ np.concatenate((chunk_x1[k], chunk_x2[k]), 0)[indices] ] chunk_x[k] -= self.zmuv_means[k] chunk_x[k] /= self.zmuv_stds[k] chunk_x[k] = chunk_x[k][:, None, :, :] yield chunk_x, chunk_y return buffering.buffered_gen_threaded(random_gen())
def create_fixed_gen(self, images, augment=False): augmentation_transforms = self.augmentation_transforms_test if augment else None gen = data.multiscale_patches_gen_fixed( images, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=self.chunk_size, augmentation_transforms=augmentation_transforms) def fixed_gen(): for chunks_x, chunk_shape, chunk_length in gen: for k in range(len(chunks_x)): chunks_x[k] -= self.zmuv_means[k] chunks_x[k] /= self.zmuv_stds[k] chunks_x[k] = chunks_x[k][:, None, :, :] yield chunks_x, chunk_length return buffering.buffered_gen_threaded(fixed_gen())
def create_random_gen(self, images, labels): gen = data.multiscale_patches_gen_augmented( images, labels, self.scale_factors, patch_sizes=self.patch_sizes, chunk_size=self.chunk_size, num_chunks=self.num_chunks_train, augmentation_params=self.augmentation_params) def random_gen(): for chunks_x, chunk_y, chunk_shape in gen: for k in range(len(chunks_x)): chunks_x[k] -= self.zmuv_means[k] chunks_x[k] /= self.zmuv_stds[k] chunks_x[k] = chunks_x[k][:, None, :, :] yield chunks_x, chunk_y return buffering.buffered_gen_threaded(random_gen())
def get_preds_targs(data_iterator): print('Data') print('n', sys.argv[2], ': %d' % data_iterator.nsamples) validation_losses = [] preds = [] targs = [] ids = [] for n, (x_chunk, y_chunk, id_chunk) in enumerate( buffering.buffered_gen_threaded(data_iterator.generate())): inputs, labels = Variable(torch.from_numpy(x_chunk).cuda(), volatile=True), Variable( torch.from_numpy(y_chunk).cuda(), volatile=True) predictions = model.l_out(inputs) loss = criterion(predictions, labels) validation_losses.append(loss.cpu().data.numpy()[0]) targs.append(y_chunk) if feat: for idx, img_id in enumerate(id_chunk): np.savez(open(outputs_path + '/' + str(img_id) + '.npz', 'w'), features=predictions[idx]) preds.append(predictions.cpu().data.numpy()) # print id_chunk, targets, loss if n % 50 == 0: print(n, 'batches processed') ids.append(id_chunk) preds = np.concatenate(preds) targs = np.concatenate(targs) ids = np.stack(ids) print('Validation loss', np.mean(validation_losses)) return preds, targs, ids
def get_preds_targs_tta(data_iterator): print 'Data' print 'n', sys.argv[2], ': %d' % data_iterator.nsamples #validation_losses = [] preds = [] targs = [] ids = [] for n, (x_chunk, y_chunk, id_chunk) in enumerate(buffering.buffered_gen_threaded(data_iterator.generate())): # load chunk to GPU #if n == 10: # break x_shared.set_value(x_chunk) y_shared.set_value(y_chunk) loss, predictions = iter_get() final_prediction = np.mean(predictions, axis=0) #avg_loss = np.mean(loss, axis=0) #validation_losses.append(avg_loss) targs.append(y_chunk[0]) ids.append(id_chunk) preds.append(final_prediction) if n%1000 ==0: print n, 'batches processed' preds = np.stack(preds) targs = np.stack(targs) ids = np.stack(ids) print preds.shape print targs.shape print ids.shape #print 'Validation loss', np.mean(validation_losses) return preds, targs, ids
def create_fixed_gen(self, subset): if subset == "train": y = self.y_train image_shapes = self.image_shapes_train elif subset == "valid": y = self.y_valid image_shapes = self.image_shapes_valid elif subset == "test": y = self.y_test image_shapes = self.image_shapes_test else: raise Exception num_batches = int(np.ceil(float(y.shape[0]) / self.chunk_size)) def fixed_gen(): for i in range(num_batches): if i == num_batches - 1: chunk_x1 = np.zeros((self.chunk_size, y.shape[1]), dtype=np.float32) chunk_x2 = np.zeros( (self.chunk_size, image_shapes.shape[1]), dtype=np.float32) chunk_length = y.shape[0] - (num_batches - 1) * self.chunk_size chunk_x1[:chunk_length] = y[i * self.chunk_size:] chunk_x2[:chunk_length] = image_shapes[i * self.chunk_size:] else: chunk_x1 = y[i * self.chunk_size:(i + 1) * self.chunk_size] chunk_x2 = image_shapes[i * self.chunk_size:(i + 1) * self.chunk_size] chunk_length = self.chunk_size yield [chunk_x1, chunk_x2], chunk_length return buffering.buffered_gen_threaded(fixed_gen())
def get_batch(self, batch_size=32, shuffle=False, rng_seed=None, buffer_size=2, dtype=np.float32, chw_order=False): """Buffered generator. Returns minibatches of dataset (X, y) w/ real-time augmentations applied on-the-fly. If y is not provided, get_batch will only return minibatches of X. Parameters --------- batch_size: int, default=32 Size of minibatches to extract from X. If X % batch_size != 0, then the last batch returned the remainder, X % batch_size. shuffle: bool, default=False Whether to shuffle X and y before generating minibatches. rng_seed: int, default=None Seed to random state that shuffles X,y (if `shuffle=true`). buffer_size: int, default=2 Size of to load in the buffer with each call. dtype: np.dtype, default=np.dtype32 Data type of minibatch to be returned. chw_order: bool, default=False Return shape of minibatch. If False, minibatch returns will be of shape (batch_size, height, width, channel). If True, minibatches will be return of shape (batch_size, channel, height, width) Yield --------- ret: tuple OR ndarray If y is None (supplied at initialization of generator), returns minibatch of X with shape depending on `chw_order`. If y is initialized, returns tuple (mb_x, mb_y), where mb_x is minibatch of X and mb_y is minibatch of y wit shape depending on `chw_order`. """ ndata = len(self.X) # set randomstate for shuffling data, if supplied if rng_seed is None: rng = np.random else: rng = np.random.RandomState(seed=rng_seed) # index to iterate through X, y idxs = range(ndata) if shuffle: rng.shuffle(idxs) # set up generator with buffer def gen_batch(): # generate batches nb_batch = int(np.ceil(float(ndata)/batch_size)) for b in range(nb_batch): # determine batch size. all should equal bsize except the # last batch, when len(X) % bsize != 0. batch_end = (b + 1) * batch_size if batch_end > ndata: nb_samples = ndata - b * batch_size else: nb_samples = batch_size # get a minibatch bX = [] for i in xrange(nb_samples): idx = idxs[(b * batch_size) + i] x = np.array( self.data_loader(self.X[idx], **self.dl_kwargs), dtype=np.float32) # apply actions: zmuv, static_aug, rng_aug, etc. x = self.standardize(x) bX.append(x) bX = np.array(bX, dtype=dtype) # do batch zmuv if self.batch_zmuv: bX = bX - bX.mean(axis=self.batch_axis) bX = bX / (bX.std(axis=self.batch_axis) + 1e-12) if chw_order: bX = bX.transpose(0, 3, 1, 2) if self.y is not None: bslice = idxs[b * batch_size: b * batch_size + nb_samples] yield bX, self.y[bslice] else: yield bX return dtb.buffered_gen_threaded(gen_batch(), buffer_size=buffer_size)
get_predictions_patch = theano.function([], nn.layers.get_output( model.l_out, deterministic=True), givens=givens, on_unused_input='ignore') data_iterator = config().data_iterators[data_iterator_part] print print 'Data' print 'n samples: %d' % data_iterator.nsamples start_time = time.time() for n, (x, lung_mask, tf_matrix, pid) in enumerate( buffering.buffered_gen_threaded(data_iterator.generate(), buffer_size=2)): print '-------------------------------------' print n, pid predictions_scan = np.zeros( (1, 1, n_windows * stride, n_windows * stride, n_windows * stride)) for iz in xrange(n_windows): for iy in xrange(n_windows): for ix in xrange(n_windows): start_time_patch = time.time() x_shared.set_value(x[:, :, iz * stride:(iz * stride) + window_size, iy * stride:(iy * stride) + window_size, ix * stride:(ix * stride) + window_size]) predictions_patch = get_predictions_patch()
nn.layers.set_all_param_values(model.l_out, metadata['param_values']) # theano functions iter_test = theano.function([model.l_in.input_var], nn.layers.get_output(model.l_out, deterministic=True)) if set == 'test': pid2label = utils_lung.read_test_labels(pathfinder.TEST_LABELS_PATH) data_iterator = config().test_data_iterator print print 'Data' print 'n test: %d' % data_iterator.nsamples pid2prediction = {} for i, (x_test, _, id_test) in enumerate(buffering.buffered_gen_threaded( data_iterator.generate())): predictions = iter_test(x_test) pid = id_test[0] print predictions pid2prediction[pid] = predictions[1] if predictions.shape[-1] == 2 else predictions[0] print i, pid, predictions, pid2label[pid] utils.save_pkl(pid2prediction, output_pkl_file) print 'Saved validation predictions into pkl', os.path.basename(output_pkl_file) test_loss = utils_lung.evaluate_log_loss(pid2prediction, pid2label) print 'Test loss', test_loss utils_lung.write_submission(pid2prediction, output_csv_file) print 'Saved predictions into csv' loss = evaluate_submission.leaderboard_performance(output_csv_file)
def train_model(expid): metadata_path = MODEL_PATH + "%s.pkl" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" data_loader.filter_patient_folders() print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) all_layers = lasagne.layers.get_all_layers(top_layer) all_params = lasagne.layers.get_all_params(top_layer, trainable=True) if "cutoff_gradients" in interface_layers: submodel_params = [param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value)] all_params = [p for p in all_params if p not in submodel_params] if "pretrained" in interface_layers: for config_name, layers_dict in interface_layers["pretrained"].iteritems(): pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split('.')[1] pretrained_resume_metadata = np.load(pretrained_metadata_path) pretrained_top_layer = lasagne.layers.MergeLayer( incomings = layers_dict.values() ) lasagne.layers.set_all_param_values(pretrained_top_layer, pretrained_resume_metadata['param_values']) num_params = sum([np.prod(p.get_value().shape) for p in all_params]) print string.ljust(" layer output shapes:",36), print string.ljust("#params:",10), print string.ljust("#data:",10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(int(num_param).__str__(), 10) num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10) print " %s %s %s %s" % (name, num_param, num_size, layer.output_shape) print " number of parameters: %d" % num_params obj = config().build_objective(interface_layers) train_loss_theano = obj.get_loss() kaggle_loss_theano = obj.get_kaggle_loss() segmentation_loss_theano = obj.get_segmentation_loss() validation_other_losses = collections.OrderedDict() validation_train_loss = obj.get_loss(average=False, deterministic=True, validation=True, other_losses=validation_other_losses) validation_kaggle_loss = obj.get_kaggle_loss(average=False, deterministic=True, validation=True) validation_segmentation_loss = obj.get_segmentation_loss(average=False, deterministic=True, validation=True) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } # contains target_vars of the objective! Not the output layers desired values! # There can be more output layers than are strictly required for the objective # e.g. for debugging ys_shared = { key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype='float32') for (key, target_var) in obj.target_vars.iteritems() } learning_rate_schedule = config().learning_rate_schedule learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) idx = T.lscalar('idx') givens = dict() for key in obj.target_vars.keys(): if key=="segmentation": givens[obj.target_vars[key]] = ys_shared[key][idx*config().sunny_batch_size : (idx+1)*config().sunny_batch_size] else: givens[obj.target_vars[key]] = ys_shared[key][idx*config().batch_size : (idx+1)*config().batch_size] for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] updates = config().build_updates(train_loss_theano, all_params, learning_rate) #grad_norm = T.sqrt(T.sum([(g**2).sum() for g in theano.grad(train_loss_theano, all_params)])) #theano_printer.print_me_this("Grad norm", grad_norm) iter_train = theano.function([idx], [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) iter_validate = theano.function([idx], [validation_train_loss, validation_kaggle_loss, validation_segmentation_loss] + [v for _, v in validation_other_losses.items()] + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore") num_chunks_train = int(config().num_epochs_train * NUM_TRAIN_PATIENTS / (config().batch_size * config().batches_per_chunk)) print "Will train for %d chunks" % num_chunks_train if config().restart_from_save and os.path.isfile(metadata_path): print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) start_chunk_idx = resume_metadata['chunks_since_start'] + 1 chunks_train_idcs = range(start_chunk_idx, num_chunks_train) # set lr to the correct value current_lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) print " setting learning rate to %.7f" % current_lr learning_rate.set_value(current_lr) losses_train = resume_metadata['losses_train'] losses_eval_valid = resume_metadata['losses_eval_valid'] losses_eval_train = resume_metadata['losses_eval_train'] losses_eval_valid_kaggle = [] #resume_metadata['losses_eval_valid_kaggle'] losses_eval_train_kaggle = [] #resume_metadata['losses_eval_train_kaggle'] else: chunks_train_idcs = range(num_chunks_train) losses_train = [] losses_eval_valid = [] losses_eval_train = [] losses_eval_valid_kaggle = [] losses_eval_train_kaggle = [] create_train_gen = partial(config().create_train_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys()# + ["patients"], ) create_eval_valid_gen = partial(config().create_eval_valid_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys()# + ["patients"] ) create_eval_train_gen = partial(config().create_eval_train_gen, required_input_keys = xs_shared.keys(), required_output_keys = ys_shared.keys() ) print "Train model" start_time = time.time() prev_time = start_time num_batches_chunk = config().batches_per_chunk for e, train_data in izip(chunks_train_idcs, buffering.buffered_gen_threaded(create_train_gen())): print "Chunk %d/%d" % (e + 1, num_chunks_train) epoch = (1.0 * config().batch_size * config().batches_per_chunk * (e+1) / NUM_TRAIN_PATIENTS) print " Epoch %.1f" % epoch for key, rate in learning_rate_schedule.iteritems(): if epoch >= key: lr = np.float32(rate) learning_rate.set_value(lr) print " learning rate %.7f" % lr if config().dump_network_loaded_data: pickle.dump(train_data, open("data_loader_dump_train_%d.pkl"%e, "wb")) for key in xs_shared: xs_shared[key].set_value(train_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(train_data["output"][key]) #print "train:", sorted(train_data["output"]["patients"]) losses = [] kaggle_losses = [] segmentation_losses = [] for b in xrange(num_batches_chunk): iter_result = iter_train(b) loss, kaggle_loss, segmentation_loss = tuple(iter_result[:3]) utils.detect_nans(loss, xs_shared, ys_shared, all_params) losses.append(loss) kaggle_losses.append(kaggle_loss) segmentation_losses.append(segmentation_loss) mean_train_loss = np.mean(losses) print " mean training loss:\t\t%.6f" % mean_train_loss losses_train.append(mean_train_loss) print " mean kaggle loss:\t\t%.6f" % np.mean(kaggle_losses) print " mean segment loss:\t\t%.6f" % np.mean(segmentation_losses) if ((e + 1) % config().validate_every) == 0: print print "Validating" if config().validate_train_set: subsets = ["validation", "train"] gens = [create_eval_valid_gen, create_eval_train_gen] losses_eval = [losses_eval_valid, losses_eval_train] losses_kaggle = [losses_eval_valid_kaggle, losses_eval_train_kaggle] else: subsets = ["validation"] gens = [create_eval_valid_gen] losses_eval = [losses_eval_valid] losses_kaggle = [losses_eval_valid_kaggle] for subset, create_gen, losses_validation, losses_kgl in zip(subsets, gens, losses_eval, losses_kaggle): vld_losses = [] vld_kaggle_losses = [] vld_segmentation_losses = [] vld_other_losses = {k:[] for k,_ in validation_other_losses.items()} print " %s set (%d samples)" % (subset, get_number_of_validation_samples(set=subset)) for validation_data in buffering.buffered_gen_threaded(create_gen()): num_batches_chunk_eval = config().batches_per_chunk if config().dump_network_loaded_data: pickle.dump(validation_data, open("data_loader_dump_valid_%d.pkl"%e, "wb")) for key in xs_shared: xs_shared[key].set_value(validation_data["input"][key]) for key in ys_shared: ys_shared[key].set_value(validation_data["output"][key]) #print "validate:", validation_data["output"]["patients"] for b in xrange(num_batches_chunk_eval): losses = tuple(iter_validate(b)[:3+len(validation_other_losses)]) loss, kaggle_loss, segmentation_loss = losses[:3] other_losses = losses[3:] vld_losses.extend(loss) vld_kaggle_losses.extend(kaggle_loss) vld_segmentation_losses.extend(segmentation_loss) for k, other_loss in zip(validation_other_losses, other_losses): vld_other_losses[k].extend(other_loss) vld_losses = np.array(vld_losses) vld_kaggle_losses = np.array(vld_kaggle_losses) vld_segmentation_losses = np.array(vld_segmentation_losses) for k in validation_other_losses: vld_other_losses[k] = np.array(vld_other_losses[k]) # now select only the relevant section to average sunny_len = get_lenght_of_set(name="sunny", set=subset) regular_len = get_lenght_of_set(name="regular", set=subset) num_valid_samples = get_number_of_validation_samples(set=subset) #print losses[:num_valid_samples] #print kaggle_losses[:regular_len] #print segmentation_losses[:sunny_len] loss_to_save = obj.compute_average(vld_losses[:num_valid_samples]) print " mean training loss:\t\t%.6f" % loss_to_save print " mean kaggle loss:\t\t%.6f" % np.mean(vld_kaggle_losses[:regular_len]) print " mean segment loss:\t\t%.6f" % np.mean(vld_segmentation_losses[:sunny_len]) # print " acc:\t%.2f%%" % (acc * 100) for k, v in vld_other_losses.items(): print " mean %s loss:\t\t%.6f" % (k, obj.compute_average(v[:num_valid_samples], loss_name=k)) print losses_validation.append(loss_to_save) kaggle_to_save = np.mean(vld_kaggle_losses[:regular_len]) losses_kgl.append(kaggle_to_save) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print if ((e + 1) % config().save_every) == 0: print print "Saving metadata, parameters" with open(metadata_path, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'chunks_since_start': e, 'losses_train': losses_train, 'losses_eval_train': losses_eval_train, 'losses_eval_train_kaggle': losses_eval_train_kaggle, 'losses_eval_valid': losses_eval_valid, 'losses_eval_valid_kaggle': losses_eval_valid_kaggle, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer) }, f, pickle.HIGHEST_PROTOCOL) print " saved to %s" % metadata_path print # store all known outputs from last batch: if config().take_a_dump: all_theano_variables = [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print() for layer in all_layers[:-1]: all_theano_variables.append(lasagne.layers.helper.get_output(layer)) iter_train = theano.function([idx], all_theano_variables, givens=givens, on_unused_input="ignore", updates=updates, # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) train_data["intermediates"] = iter_train(0) pickle.dump(train_data, open(metadata_path + "-dump", "wb")) return
print 'Data' print 'n train: %d' % train_data_iterator.nsamples print 'n validation: %d' % valid_data_iterator.nsamples print 'n chunks per epoch', config().nchunks_per_epoch print print 'Train model' chunk_idx = 0 start_time = time.time() prev_time = start_time tmp_losses_train = [] losses_train_print = [] print 'Training' for chunk_idx, (x_chunk_train, y_chunk_train, id_train) in izip(range(5), buffering.buffered_gen_threaded( train_data_iterator.generate())): # load chunk to GPU x_shared.set_value(x_chunk_train) y_shared.set_value(y_chunk_train) # make nbatches_chunk iterations loss = iter_train() # print loss, y_chunk_train, id_train tmp_losses_train.append(loss) losses_train_print.append(loss) print 'Validating' for i, (x_chunk_valid, y_chunk_valid, ids_batch) in enumerate( # buffering.buffered_gen_threaded(
nn.layers.get_output(model.l_out), givens=givens_valid) iter_get_mu = theano.function([], nn.layers.get_output(model.l_mu), givens=givens_valid) # valid_data_iterator = config().valid_data_iterator valid_data_iterator = config().train_data_iterator print print 'Data' print 'n validation: %d' % valid_data_iterator.nsamples valid_losses_dice = [] valid_losses_ce = [] for n, (x_chunk, y_chunk, id_chunk) in enumerate( buffering.buffered_gen_threaded(valid_data_iterator.generate())): # load chunk to GPU x_shared.set_value(x_chunk) targets = y_chunk inputs = x_chunk predictions = iter_get_predictions() print 'targets', targets print 'predictions', iter_get_mu() # targets = data_transforms.make_3d_mask_from_annotations(inputs[0, 0].shape, targets, shape='sphere') # z = predictions[0, 0, :] # y = predictions[0, 1, :] # x = predictions[0, 2, :] # pp = z[:, None, None] * y[None, :, None] * x[None, None, :]
print('n validation: %d' % valid_data_iterator.nsamples) print('n chunks per epoch', config().nchunks_per_epoch) print() print('Train model') chunk_idx = 0 start_time = time.time() prev_time = start_time tmp_losses_train = [] losses_train_print = [] print('Training') for chunk_idx, (x_chunk_train, y_chunk_train, id_train) in zip( range(5), buffering.buffered_gen_threaded(train_data_iterator.generate())): # load chunk to GPU x_shared.set_value(x_chunk_train) y_shared.set_value(y_chunk_train) # make nbatches_chunk iterations loss = iter_train() # print(loss), y_chunk_train, id_train tmp_losses_train.append(loss) losses_train_print.append(loss) print('Validating') for i, (x_chunk_valid, y_chunk_valid, ids_batch) in enumerate( # buffering.buffered_gen_threaded(
print print 'Data' print 'n train: %d' % train_data_iterator.nsamples print 'n validation: %d' % valid_data_iterator.nsamples print print 'Train model' chunk_idx = 0 start_time = time.time() prev_time = start_time tmp_losses_train = [] # use buffering.buffered_gen_threaded() for chunk_idx, (xs_chunk, ys_chunk, _) in izip(chunk_idxs, buffering.buffered_gen_threaded(train_data_iterator.generate())): if chunk_idx in learning_rate_schedule: lr = np.float32(learning_rate_schedule[chunk_idx]) print ' setting learning rate to %.7f' % lr print learning_rate.set_value(lr) # load chunk to GPU for x_shared, x in zip(xs_shared, xs_chunk): x_shared.set_value(x) for y_shared, y in zip(ys_shared, ys_chunk): y_shared.set_value(y) # make nbatches_chunk iterations for b in xrange(config().nbatches_chunk): loss = iter_train(b)
givens_in[l_in.input_var] = x iter_test_det = theano.function([], [nn.layers.get_output(l, deterministic=True) for l in model.l_outs], givens=givens_in, on_unused_input='warn') iter_mu = theano.function([], [nn.layers.get_output(l, deterministic=True) for l in model.mu_layers], givens=givens_in, on_unused_input='warn') iter_sigma = theano.function([], [nn.layers.get_output(l, deterministic=True) for l in model.sigma_layers], givens=givens_in, on_unused_input='warn') print ' generating predictions for the validation set' valid_data_iterator = config().valid_data_iterator batch_predictions, batch_targets, batch_ids = [], [], [] mu_predictions, sigma_predictions = [], [] for xs_batch_valid, ys_batch_valid, ids_batch in buffering.buffered_gen_threaded( valid_data_iterator.generate()): for x_shared, x in zip(xs_shared, xs_batch_valid): x_shared.set_value(x) batch_targets.append(ys_batch_valid) batch_predictions.append(iter_test_det()) batch_ids.append(ids_batch) mu_predictions.append(iter_mu()) sigma_predictions.append(iter_sigma()) pid2mu = utils_heart.get_patient_normparam_prediction(mu_predictions, batch_ids) pid2sigma = utils_heart.get_patient_normparam_prediction(sigma_predictions, batch_ids) valid_pid2musigma = {} for pid in pid2mu.iterkeys(): valid_pid2musigma[pid] = {'mu': pid2mu[pid], 'sigma': pid2sigma[pid]}
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print("WARNING: not running in fast mode!") print("Using") print(" %s" % metadata_path) print("To generate") print(" %s" % prediction_path) print(" %s" % submission_path) print("Build model") interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=list(output_layers.values())) all_layers = lasagne.layers.get_all_layers(top_layer) num_params = lasagne.layers.count_params(top_layer) print(" number of parameters: %d" % num_params) print(string.ljust(" layer output shapes:", 36), end=' ') print(string.ljust("#params:", 10), end=' ') print("output shape:") for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print(" %s %s %s" % (name, num_param, layer.output_shape)) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.items() } idx = T.lscalar('idx') givens = dict() for key in list(input_layers.keys()): if key == "sunny": givens[input_layers[key].input_var] = xs_shared[key][idx * config( ).sunny_batch_size:(idx + 1) * config().sunny_batch_size] else: givens[input_layers[key]. input_var] = xs_shared[key][idx * config().batch_size:(idx + 1) * config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in list(output_layers.values()) ] iter_test = theano.function( [idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print("Load model parameters for resuming") resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = list(range(1, num_chunks + 1)) data_loader.filter_patient_folders() create_test_gen = partial( config().create_test_gen, required_input_keys=list(xs_shared.keys()), required_output_keys=[ "patients", "classification_correction_function" ], ) print("Generate predictions with this model") start_time = time.time() prev_time = start_time predictions = [{ "patient": i + 1, "systole": np.zeros((0, 600)), "diastole": np.zeros((0, 600)) } for i in range(NUM_PATIENTS)] for e, test_data in zip(itertools.count(start=1), buffering.buffered_gen_threaded( create_test_gen())): print(" load testing data onto GPU") for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] classification_correction = test_data["output"][ "classification_correction_function"] print(" patients:", " ".join(map(str, patient_ids))) print(" chunk %d/%d" % (e, num_chunks)) for b in range(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = { list(output_layers.keys())[i]: network_outputs[i] for i in range(len(output_layers)) } kaggle_systoles, kaggle_diastoles = config().postprocess( network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype( 'float64'), kaggle_diastoles.astype('float64') for idx, patient_id in enumerate( patient_ids[b * config().batch_size:(b + 1) * config().batch_size]): if patient_id != 0: index = patient_id - 1 patient_data = predictions[index] assert patient_id == patient_data["patient"] kaggle_systole = kaggle_systoles[idx:idx + 1, :] kaggle_diastole = kaggle_diastoles[idx:idx + 1, :] assert np.isfinite(kaggle_systole).all() and np.isfinite( kaggle_systole).all() kaggle_systole = classification_correction[ b * config().batch_size + idx](kaggle_systole) kaggle_diastole = classification_correction[ b * config().batch_size + idx](kaggle_diastole) assert np.isfinite(kaggle_systole).all() and np.isfinite( kaggle_systole).all() patient_data["systole"] = np.concatenate( (patient_data["systole"], kaggle_systole), axis=0) patient_data["diastole"] = np.concatenate( (patient_data["diastole"], kaggle_diastole), axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * ( float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print(" %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)) print(" estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)) print() already_printed = False for prediction in predictions: if prediction["systole"].size > 0 and prediction["diastole"].size > 0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction["systole_average"] = average_method( prediction["systole"]) prediction["diastole_average"] = average_method( prediction["diastole"]) try: test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) except: if not already_printed: print("WARNING: These distributions are not distributions") already_printed = True prediction["systole_average"] = make_monotone_distribution( prediction["systole_average"]) prediction["diastole_average"] = make_monotone_distribution( prediction["diastole_average"]) test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) print("Calculating training and validation set scores for reference") validation_dict = {} for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: errors = [] for patient in patient_ids: prediction = predictions[patient - 1] if "systole_average" in prediction: assert patient == regular_labels[patient - 1, 0] error = CRSP(prediction["systole_average"], regular_labels[patient - 1, 1]) errors.append(error) error = CRSP(prediction["diastole_average"], regular_labels[patient - 1, 2]) errors.append(error) if len(errors) > 0: errors = np.array(errors) estimated_CRSP = np.mean(errors) print(" %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP)) validation_dict[set_name] = estimated_CRSP else: print(" %s kaggle loss: not calculated" % (string.rjust(set_name, 12))) print("dumping prediction file to %s" % prediction_path) with open(prediction_path, 'w') as f: pickle.dump( { 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'submission_path': submission_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions': predictions, 'validation_errors': validation_dict, }, f, pickle.HIGHEST_PROTOCOL) print("prediction file dumped") print("dumping submission file to %s" % submission_path) with open(submission_path, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Id'] + ['P%d' % i for i in range(600)]) for prediction in predictions: # the submission only has patients 501 to 700 if prediction["patient"] in data_loader.test_patients_indices: if "diastole_average" not in prediction or "systole_average" not in prediction: raise Exception("Not all test-set patients were predicted") csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + [ "%.18f" % p for p in prediction["diastole_average"].flatten() ]) csvwriter.writerow(["%d_Systole" % prediction["patient"]] + [ "%.18f" % p for p in prediction["systole_average"].flatten() ]) print("submission file dumped") return
givens[model.l_in.input_var] = x_shared get_predictions_patch = theano.function([], nn.layers.get_output(model.l_out, deterministic=True), givens=givens, on_unused_input='ignore') valid_data_iterator = config().valid_data_iterator print print 'Data' print 'n samples: %d' % valid_data_iterator.nsamples start_time = time.time() for n, (x, y, lung_mask, annotations, tf_matrix, pid) in enumerate( buffering.buffered_gen_threaded(valid_data_iterator.generate(), buffer_size=2)): print '-------------------------------------' print n, pid predictions_scan = np.zeros((1, 1, n_windows * stride, n_windows * stride, n_windows * stride)) for iz in xrange(n_windows): for iy in xrange(n_windows): for ix in xrange(n_windows): start_time_patch = time.time() x_shared.set_value(x[:, :, iz * stride:(iz * stride) + window_size, iy * stride:(iy * stride) + window_size, ix * stride:(ix * stride) + window_size]) predictions_patch = get_predictions_patch() predictions_scan[0, 0,
print() print('Data') print('n train: %d' % train_data_iterator.nsamples) print('n validation: %d' % valid_data_iterator.nsamples) print('n chunks per epoch', config().nchunks_per_epoch) print() print('Train model') chunk_idx = 0 start_time = time.time() prev_time = start_time tmp_losses_train = [] # use buffering.buffered_gen_threaded() for chunk_idx, (x_chunk_train, y_chunk_train, id_train) in zip(chunk_idxs, buffering.buffered_gen_threaded( train_data_iterator.generate())): if chunk_idx in learning_rate_schedule: lr = np.float32(learning_rate_schedule[chunk_idx]) print(' setting learning rate to %.7f' % lr) print() learning_rate.set_value(lr) # load chunk to GPU x_shared.set_value(x_chunk_train) y_shared.set_value(y_chunk_train) # make nbatches_chunk iterations chunk_train_losses = [] for b in range(config().nbatches_chunk): loss = iter_train(b) chunk_train_losses.append(loss)
def predict_slice_model(expid, outfile, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) _check_slicemodel(input_layers) # Print the architecture _print_architecture(top_layer) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = range(1, num_chunks+1) create_test_gen = partial(config().create_test_gen, required_input_keys = xs_shared.keys(), required_output_keys = ["patients", "slices"], ) print "Generate predictions with this model" start_time = time.time() prev_time = start_time predictions = [{"patient": i+1, "slices": { slice_id: { "systole": np.zeros((0,600)), "diastole": np.zeros((0,600)) } for slice_id in data_loader.get_slice_ids_for_patient(i+1) } } for i in xrange(NUM_PATIENTS)] # Loop over data and generate predictions for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())): print " load testing data onto GPU" for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] slice_ids = test_data["output"]["slices"] print " patients:", " ".join(map(str, patient_ids)) print " chunk %d/%d" % (e, num_chunks) for b in xrange(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))} kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64') for idx, (patient_id, slice_id) in enumerate( zip(patient_ids[b*config().batch_size:(b+1)*config().batch_size], slice_ids[b*config().batch_size:(b+1)*config().batch_size])): if patient_id != 0: index = patient_id-1 patient_data = predictions[index] assert patient_id==patient_data["patient"] patient_slice_data = patient_data["slices"][slice_id] patient_slice_data["systole"] = np.concatenate((patient_slice_data["systole"], kaggle_systoles[idx:idx+1,:]),axis=0) patient_slice_data["diastole"] = np.concatenate((patient_slice_data["diastole"], kaggle_diastoles[idx:idx+1,:]),axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print # Average predictions already_printed = False for prediction in predictions: for prediction_slice_id in prediction["slices"]: prediction_slice = prediction["slices"][prediction_slice_id] if prediction_slice["systole"].size>0 and prediction_slice["diastole"].size>0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction_slice["systole_average"] = average_method(prediction_slice["systole"]) prediction_slice["diastole_average"] = average_method(prediction_slice["diastole"]) try: test_if_valid_distribution(prediction_slice["systole_average"]) test_if_valid_distribution(prediction_slice["diastole_average"]) except: if not already_printed: print "WARNING: These distributions are not distributions" already_printed = True prediction_slice["systole_average"] = make_monotone_distribution(prediction_slice["systole_average"]) prediction_slice["diastole_average"] = make_monotone_distribution(prediction_slice["diastole_average"]) print "Calculating training and validation set scores for reference" # Add CRPS scores to the predictions # Iterate over train and validation sets for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: # Iterate over patients in the set for patient in patient_ids: prediction = predictions[patient-1] # Iterate over the slices for slice_id in prediction["slices"]: prediction_slice = prediction["slices"][slice_id] if "systole_average" in prediction_slice: assert patient == regular_labels[patient-1, 0] error_sys = CRSP(prediction_slice["systole_average"], regular_labels[patient-1, 1]) prediction_slice["systole_CRPS"] = error_sys prediction_slice["target_systole"] = regular_labels[patient-1, 1] error_dia = CRSP(prediction_slice["diastole_average"], regular_labels[patient-1, 2]) prediction_slice["diastole_CRPS"] = error_dia prediction_slice["target_diastole"] = regular_labels[patient-1, 2] prediction_slice["CRPS"] = 0.5 * error_sys + 0.5 * error_dia print "dumping prediction file to %s" % outfile with open(outfile, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions_per_slice': predictions, }, f, pickle.HIGHEST_PROTOCOL) print "prediction file dumped" return
def predict_model(expid, mfile=None): metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile) prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid submission_path = SUBMISSION_PATH + "%s.csv" % expid if theano.config.optimizer != "fast_run": print "WARNING: not running in fast mode!" print "Using" print " %s" % metadata_path print "To generate" print " %s" % prediction_path print " %s" % submission_path print "Build model" interface_layers = config().build_model() output_layers = interface_layers["outputs"] input_layers = interface_layers["inputs"] top_layer = lasagne.layers.MergeLayer( incomings=output_layers.values() ) all_layers = lasagne.layers.get_all_layers(top_layer) num_params = lasagne.layers.count_params(top_layer) print " number of parameters: %d" % num_params print string.ljust(" layer output shapes:",36), print string.ljust("#params:",10), print "output shape:" for layer in all_layers[:-1]: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print " %s %s %s" % (name, num_param, layer.output_shape) xs_shared = { key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems() } idx = T.lscalar('idx') givens = dict() for key in input_layers.keys(): if key=="sunny": givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size] else: givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size] network_outputs = [ lasagne.layers.helper.get_output(network_output_layer, deterministic=True) for network_output_layer in output_layers.values() ] iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(), givens=givens, on_unused_input="ignore", # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) ) print "Load model parameters for resuming" resume_metadata = np.load(metadata_path) lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values']) num_batches_chunk = config().batches_per_chunk num_batches = get_number_of_test_batches() num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk))) chunks_train_idcs = range(1, num_chunks+1) data_loader.filter_patient_folders() create_test_gen = partial(config().create_test_gen, required_input_keys = xs_shared.keys(), required_output_keys = ["patients", "classification_correction_function"], ) print "Generate predictions with this model" start_time = time.time() prev_time = start_time predictions = [{"patient": i+1, "systole": np.zeros((0,600)), "diastole": np.zeros((0,600)) } for i in xrange(NUM_PATIENTS)] for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())): print " load testing data onto GPU" for key in xs_shared: xs_shared[key].set_value(test_data["input"][key]) patient_ids = test_data["output"]["patients"] classification_correction = test_data["output"]["classification_correction_function"] print " patients:", " ".join(map(str, patient_ids)) print " chunk %d/%d" % (e, num_chunks) for b in xrange(num_batches_chunk): iter_result = iter_test(b) network_outputs = tuple(iter_result[:len(output_layers)]) network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))} kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict) kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64') for idx, patient_id in enumerate(patient_ids[b*config().batch_size:(b+1)*config().batch_size]): if patient_id != 0: index = patient_id-1 patient_data = predictions[index] assert patient_id==patient_data["patient"] kaggle_systole = kaggle_systoles[idx:idx+1,:] kaggle_diastole = kaggle_diastoles[idx:idx+1,:] assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all() kaggle_systole = classification_correction[b*config().batch_size + idx](kaggle_systole) kaggle_diastole = classification_correction[b*config().batch_size + idx](kaggle_diastole) assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all() patient_data["systole"] = np.concatenate((patient_data["systole"], kaggle_systole ),axis=0) patient_data["diastole"] = np.concatenate((patient_data["diastole"], kaggle_diastole ),axis=0) now = time.time() time_since_start = now - start_time time_since_prev = now - prev_time prev_time = now est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0])) eta = datetime.now() + timedelta(seconds=est_time_left) eta_str = eta.strftime("%c") print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) print already_printed = False for prediction in predictions: if prediction["systole"].size>0 and prediction["diastole"].size>0: average_method = getattr(config(), 'tta_average_method', partial(np.mean, axis=0)) prediction["systole_average"] = average_method(prediction["systole"]) prediction["diastole_average"] = average_method(prediction["diastole"]) try: test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) except: if not already_printed: print "WARNING: These distributions are not distributions" already_printed = True prediction["systole_average"] = make_monotone_distribution(prediction["systole_average"]) prediction["diastole_average"] = make_monotone_distribution(prediction["diastole_average"]) test_if_valid_distribution(prediction["systole_average"]) test_if_valid_distribution(prediction["diastole_average"]) print "Calculating training and validation set scores for reference" validation_dict = {} for patient_ids, set_name in [(validation_patients_indices, "validation"), (train_patients_indices, "train")]: errors = [] for patient in patient_ids: prediction = predictions[patient-1] if "systole_average" in prediction: assert patient == regular_labels[patient-1, 0] error = CRSP(prediction["systole_average"], regular_labels[patient-1, 1]) errors.append(error) error = CRSP(prediction["diastole_average"], regular_labels[patient-1, 2]) errors.append(error) if len(errors)>0: errors = np.array(errors) estimated_CRSP = np.mean(errors) print " %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP) validation_dict[set_name] = estimated_CRSP else: print " %s kaggle loss: not calculated" % (string.rjust(set_name, 12)) print "dumping prediction file to %s" % prediction_path with open(prediction_path, 'w') as f: pickle.dump({ 'metadata_path': metadata_path, 'prediction_path': prediction_path, 'submission_path': submission_path, 'configuration_file': config().__name__, 'git_revision_hash': utils.get_git_revision_hash(), 'experiment_id': expid, 'time_since_start': time_since_start, 'param_values': lasagne.layers.get_all_param_values(top_layer), 'predictions': predictions, 'validation_errors': validation_dict, }, f, pickle.HIGHEST_PROTOCOL) print "prediction file dumped" print "dumping submission file to %s" % submission_path with open(submission_path, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(['Id'] + ['P%d'%i for i in xrange(600)]) for prediction in predictions: # the submission only has patients 501 to 700 if prediction["patient"] in data_loader.test_patients_indices: if "diastole_average" not in prediction or "systole_average" not in prediction: raise Exception("Not all test-set patients were predicted") csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + ["%.18f" % p for p in prediction["diastole_average"].flatten()]) csvwriter.writerow(["%d_Systole" % prediction["patient"]] + ["%.18f" % p for p in prediction["systole_average"].flatten()]) print "submission file dumped" return
print('n validation: %d' % valid_data_iterator.nsamples) print('n chunks per epoch', config().nchunks_per_epoch) print() print('Train model') chunk_idx = 0 start_time = time.time() prev_time = start_time tmp_losses_train = defaultdict(list) losses_train_print = defaultdict(list) # use buffering.buffered_gen_threaded() for chunk_idx, (x_chunk_train, y_chunk_train, z_chunk_train, id_train) in zip( chunk_idxs, buffering.buffered_gen_threaded(train_data_iterator.generate())): if chunk_idx in learning_rate_schedule: lr = np.float32(learning_rate_schedule[chunk_idx]) print(' setting learning rate to %.7f' % lr) print() learning_rate.set_value(lr) # load chunk to GPU x_shared.set_value(x_chunk_train) y_shared.set_value(y_chunk_train) if config().need_enable: z_shared.set_value(z_chunk_train) # make nbatches_chunk iterations for b in range(config().nbatches_chunk): losses = iter_train(b)
# theano functions iter_get_predictions = theano.function([], [valid_loss, nn.layers.get_output(model.l_out, deterministic=True)], givens=givens_valid) valid_data_iterator = config().valid_data_iterator print print 'Data' print 'n validation: %d' % valid_data_iterator.nsamples threshold = 0.2 n_tp, n_tn, n_fp, n_fn = 0, 0, 0, 0 n_pos = 0 n_neg = 0 validation_losses = [] for n, (x_chunk, y_chunk, id_chunk) in enumerate(buffering.buffered_gen_threaded(valid_data_iterator.generate())): # load chunk to GPU x_shared.set_value(x_chunk) y_shared.set_value(y_chunk) loss, predictions = iter_get_predictions() validation_losses.append(loss) targets = y_chunk[0, 0] p1 = predictions[0][1] if targets == 1 and p1 >= threshold: n_tp += 1 if targets == 1 and p1 < threshold: n_fn += 1 if targets == 0 and p1 >= threshold: n_fp += 1 if targets == 0 and p1 < threshold: n_tn += 1
print 'Data' print 'n train: %d' % train_data_iterator.nsamples print 'n validation: %d' % valid_data_iterator.nsamples print 'n chunks per epoch', config().nchunks_per_epoch print print 'Train model' chunk_idx = 0 start_time = time.time() prev_time = start_time tmp_losses_train = defaultdict(list) losses_train_print = defaultdict(list) # use buffering.buffered_gen_threaded() for chunk_idx, (x_chunk_train, y_chunk_train, z_chunk_train, id_train) in izip(chunk_idxs, buffering.buffered_gen_threaded( train_data_iterator.generate())): if chunk_idx in learning_rate_schedule: lr = np.float32(learning_rate_schedule[chunk_idx]) print ' setting learning rate to %.7f' % lr print learning_rate.set_value(lr) # load chunk to GPU x_shared.set_value(x_chunk_train) y_shared.set_value(y_chunk_train) if config().need_enable: z_shared.set_value(z_chunk_train) # make nbatches_chunk iterations for b in xrange(config().nbatches_chunk): losses = iter_train(b)
if n_tta_iterations > 1: valid_data_iterator.transformation_params = config( ).train_transformation_params valid_data_iterator.transformation_params['zoom_range'] = (1., 1.) print 'valid transformation params' print valid_data_iterator.transformation_params print print 'n valid: %d' % valid_data_iterator.nsamples batch_predictions, batch_targets, batch_ids = [], [], [] for i in xrange(n_tta_iterations): print i, sys.stdout.flush() for xs_batch_valid, ys_batch_valid, ids_batch in buffering.buffered_gen_threaded( valid_data_iterator.generate()): for x_shared, x in zip(xs_shared, xs_batch_valid): x_shared.set_value(x) batch_targets.append(ys_batch_valid) batch_predictions.append(iter_test_det()) batch_ids.append(ids_batch) for (systole_predictions, diastole_predictions), patient_ids in zip(batch_predictions, batch_ids): for systole_prediction, diastole_prediction, patient_id in zip( systole_predictions, diastole_predictions, patient_ids): patient_data = predictions[patient_id - 1] assert patient_data['patient'] == patient_id patient_data["systole"] = np.concatenate( (patient_data["systole"], systole_prediction[None, :]), axis=0) patient_data["diastole"] = np.concatenate(
x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape)) givens_valid = {} givens_valid[model.l_in.input_var] = x_shared # theano functions iter_get_predictions = theano.function([], nn.layers.get_output(model.l_out, deterministic=True), givens=givens_valid) valid_data_iterator = config().valid_data_iterator print() print('Data') print('n validation: %d' % valid_data_iterator.nsamples) valid_losses_dice = [] tp = 0 for n, (x_chunk, y_chunk, id_chunk) in enumerate(buffering.buffered_gen_threaded(valid_data_iterator.generate())): # load chunk to GPU x_shared.set_value(x_chunk) predictions = iter_get_predictions() targets = y_chunk inputs = x_chunk if predictions.shape != targets.shape: pad_width = (np.asarray(targets.shape) - np.asarray(predictions.shape)) / 2 pad_width = [(p, p) for p in pad_width] predictions = np.pad(predictions, pad_width=pad_width, mode='constant') dice = utils_lung.dice_index(predictions, targets) print(n, id_chunk, dice) valid_losses_dice.append(dice) if np.sum(predictions * targets) / np.sum(targets) > 0.1:
def get_batch(self, batch_size=32, shuffle=False, rng_seed=None, buffer_size=2, dtype=np.float32, chw_order=False): """Buffered generator. Returns minibatches of dataset (X, y) w/ real-time augmentations applied on-the-fly. If y is not provided, get_batch will only return minibatches of X. Parameters --------- batch_size: int, default=32 Size of minibatches to extract from X. If X % batch_size != 0, then the last batch returned the remainder, X % batch_size. shuffle: bool, default=False Whether to shuffle X and y before generating minibatches. rng_seed: int, default=None Seed to random state that shuffles X,y (if `shuffle=true`). buffer_size: int, default=2 Size of to load in the buffer with each call. dtype: np.dtype, default=np.dtype32 Data type of minibatch to be returned. chw_order: bool, default=False Return shape of minibatch. If False, minibatch returns will be of shape (batch_size, height, width, channel). If True, minibatches will be return of shape (batch_size, channel, height, width) Yield --------- ret: tuple OR ndarray If y is None (supplied at initialization of generator), returns minibatch of X with shape depending on `chw_order`. If y is initialized, returns tuple (mb_x, mb_y), where mb_x is minibatch of X and mb_y is minibatch of y wit shape depending on `chw_order`. """ ndata = len(self.X) # set randomstate for shuffling data, if supplied if rng_seed is None: rng = np.random else: rng = np.random.RandomState(seed=rng_seed) # index to iterate through X, y idxs = range(ndata) if shuffle: rng.shuffle(idxs) # set up generator with buffer def gen_batch(): # generate batches nb_batch = int(np.ceil(float(ndata) / batch_size)) for b in range(nb_batch): # determine batch size. all should equal bsize except the # last batch, when len(X) % bsize != 0. batch_end = (b + 1) * batch_size if batch_end > ndata: nb_samples = ndata - b * batch_size else: nb_samples = batch_size # get a minibatch bX = [] for i in xrange(nb_samples): idx = idxs[(b * batch_size) + i] x = np.array(self.data_loader(self.X[idx], **self.dl_kwargs), dtype=np.float32) # apply actions: zmuv, static_aug, rng_aug, etc. x = self.standardize(x) bX.append(x) bX = np.array(bX, dtype=dtype) # do batch zmuv if self.batch_zmuv: bX = bX - bX.mean(axis=self.batch_axis) bX = bX / (bX.std(axis=self.batch_axis) + 1e-12) if chw_order: if self.greyscale: bX = np.expand_dims(bX, 3) bX = bX.transpose(0, 3, 1, 2) if self.y is not None: bslice = idxs[b * batch_size:b * batch_size + nb_samples] yield bX, self.y[bslice] else: yield bX return dtb.buffered_gen_threaded(gen_batch(), buffer_size=buffer_size)