def time_delay_generator(x, y, delays, batch_size, weights=None, shuffle=True): '''A generator to make it easy to fit time-delay regression models, i.e. a model where the value of y depends on past values of x # Arguments x: input data, as a Numpy array y: targets, as a Numpy array or None for prediction generation delays: number of time-steps to include in model weights: Numpy array of weights for the samples shuffle: Whether or not to shuffle the data (set True for training) # Example if X_train is (1000,200), Y_train is (1000,1) train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100) train_gen is a generator that gives: x_batch as size (100,10,200) since each of the 100 samples includes the input data at the current and nine previous time steps y_batch as size (100,1) w_batch as size (100,) ''' if type(delays) is int: delays = range(delays) if type(x) is not list: x = list([x]) index_array = np.arange(x[0].shape[0]) tlists = [[1, 0] + list(range(2, np.ndim(xx) + 1)) for xx in x] batches = _make_batches(x[0].shape[0], batch_size) while 1: if shuffle: np.random.shuffle(index_array) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] batch_ids_delay = [ np.minimum(np.maximum(0, batch_ids - d), x[0].shape[0] - 1) for d in delays ] x_batch = _standardize_input_data([ xx[batch_ids_delay, :].transpose(tt) for xx, tt in zip(x, tlists) ], ['x_batch' + str(i) for i in range(1, len(x) + 1)]) if y is None: yield x_batch else: y_batch = _standardize_input_data(y[batch_ids, :], ['y_batch']) if weights is not None: w_batch = weights[batch_ids, :][:, 0] else: w_batch = np.ones(x_batch[0].shape[0]) w_batch[batch_ids < delays[-1]] = 0. w_batch = _standardize_sample_weights(w_batch, ['w_batch']) yield (x_batch, y_batch, w_batch)
def _standardize_user_data(model, x, y, sample_weight=None, class_weight=None, check_batch_dim=True, batch_size=None): if not hasattr(model, 'optimizer'): raise Exception('You must compile a model before training/testing.' ' Use `model.compile(optimizer, loss)`.') output_shapes = [] for output_shape, loss_fn in zip(model.internal_output_shapes, model.loss_functions): if loss_fn.__name__ == 'sparse_categorical_crossentropy': output_shapes.append(output_shape[:-1] + (1, )) elif getattr(losses, loss_fn.__name__, None) is None: output_shapes.append(None) else: output_shapes.append(output_shape) x = _standardize_input_data(x, model.input_names, model.internal_input_shapes, exception_prefix='model input') y = _standardize_input_data(y, model.output_names, output_shapes, exception_prefix='model target') sample_weights = _standardize_sample_weights(sample_weight, model.output_names) class_weights = _standardize_class_weights(class_weight, model.output_names) sample_weights = [ _standardize_weights(ref, sw, cw, mode) for (ref, sw, cw, mode) in zip( y, sample_weights, class_weights, model.sample_weight_modes) ] ''' We only need to comment out check_array_lengeh(x, y, weights) in the next line to let the model compile and train. ''' # check_array_lengths(x, y, sample_weights) _check_loss_and_target_compatibility(y, model.loss_functions, model.internal_output_shapes) if model.stateful and batch_size: if x[0].shape[0] % batch_size != 0: raise Exception('In a stateful network, ' 'you should only pass inputs with ' 'a number of samples that can be ' 'divided by the batch size. Found: ' + str(x[0].shape[0]) + ' samples') return x, y, sample_weights
def time_delay_generator_jitter(x, y, delays, batch_size, weights=None, shuffle=True, conv3d=False, jitter=True, jitter_axes=[3,4], max_jitter=1): '''A generator to make it easy to fit time-delay regression models, i.e. a model where the value of y depends on past values of x # Arguments x: input data, as a Numpy array y: targets, as a Numpy array or None for prediction generation delays: number of time-steps to include in model weights: Numpy array of weights for the samples shuffle: Whether or not to shuffle the data (set True for training) # Example if X_train is (1000,200), Y_train is (1000,1) train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100) train_gen is a generator that gives: x_batch as size (100,10,200) since each of the 100 samples includes the input data at the current and nine previous time steps y_batch as size (100,1) w_batch as size (100,) ''' index_array = np.arange(x.shape[0]) if conv3d: tlist = [1, 2, 0] + range(3, np.ndim(x) + 1) else: tlist = [1, 0] + range(2, np.ndim(x) + 1) batches = _make_batches(x.shape[0], batch_size) while 1: if shuffle: np.random.shuffle(index_array) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)] x_batch = x[batch_ids, :].transpose(tlist) if jitter: for j in jitter_axes: x_batch = np.roll(x_batch, np.random.randint(-max_jitter,max_jitter+1), axis=j) x_batch = _standardize_input_data(x_batch, ['x_batch']) if y is None: yield x_batch else: y_batch = _standardize_input_data(y[batch_ids[0], :], ['y_batch']) if weights is not None: w_batch = weights[batch_ids[0], :][:, 0] else: w_batch = np.ones(x_batch[0].shape[0]) w_batch[batch_ids[0] < delays] = 0. w_batch = _standardize_sample_weights(w_batch, ['w_batch']) yield (x_batch, y_batch, w_batch)
def _input_grad(self, x, layer, filter_slices=None, filter_func=None, filter_func_kwargs=None): """Adapted from keras.engine.training.predict_on_batch. Returns gradients for a single batch of samples. # Arguments x: Input samples, as a Numpy array. # Returns Numpy array(s) of predictions. """ from keras.engine.training import _standardize_input_data from keras import backend as K x = _standardize_input_data(x, self.model._feed_input_names, self.model._feed_input_shapes) if self.model.uses_learning_phase and not isinstance( K.learning_phase(), int): ins = x + [0.] else: ins = x gf = self.__generate_direct_saliency_functions__( layer, filter_slices, filter_func, filter_func_kwargs) outputs = gf(ins) if len(outputs) == 1: return outputs[0] return outputs
def time_delay_generator_conv(x, filt_length, frames_per_TR, TRs_in_model, y=None, weights=None): '''A generator to make it easy to fit time-delay regression models, i.e. a model where the value of y depends on past values of x # Arguments x: input data, as a Numpy array y: targets, as a Numpy array or None for prediction generation delays: number of time-steps to include in model weights: Numpy array of weights for the samples shuffle: Whether or not to shuffle the data (set True for training) # Example if X_train is (1000,200), Y_train is (1000,1) train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100) train_gen is a generator that gives: x_batch as size (100,10,200) since each of the 100 samples includes the input data at the current and nine previous time steps y_batch as size (100,1) w_batch as size (100,) ''' batch_size = frames_per_TR*TRs_in_model + filt_length - 1 x_size_expand = int(np.ceil((x.shape[0] - batch_size)/float(frames_per_TR))*frames_per_TR + batch_size) batches = _make_batches_overlap(x_size_expand, batch_size, frames_per_TR*(TRs_in_model-1)+filt_length-1, filt_length) print(batches) index_array = np.minimum(x.shape[0]-1, np.arange(0, x_size_expand)) while 1: for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = _standardize_input_data(x[batch_ids, :][None, :], ['x_batch']) yield x_batch
def predict(self, x, batch_size=None, learning_phase=0., verbose=0, steps=None): """Generates output predictions for the input samples. Computation is done in batches. # Arguments x: the input data, as a Numpy array (or list of Numpy arrays if the model has multiple outputs). batch_size: integer. verbose: verbosity mode, 0 or 1. steps: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of `None`. # Returns Numpy array(s) of predictions. # Raises ValueError: In case of mismatch between the provided input data and the model's expectations, or in case a stateful model receives a number of samples that is not a multiple of the batch size. [A tweaked version.] """ # Backwards compatibility. if batch_size is None and steps is None: batch_size = 32 if x is None and steps is None: raise ValueError('If predicting from data tensors, ' 'you should specify the `steps` ' 'argument.') # validate user data x = _standardize_input_data(x, self._feed_input_names, self._feed_input_shapes, check_batch_axis=False) if self.stateful: if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0: raise ValueError('In a stateful network, ' 'you should only pass inputs with ' 'a number of samples that can be ' 'divided by the batch size. Found: ' + str(x[0].shape[0]) + ' samples. ' 'Batch size: ' + str(batch_size) + '.') # prepare inputs, delegate logic to _predict_loop if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + [learning_phase] else: ins = x self._make_predict_function() f = self.predict_function return self._predict_loop(f, ins, batch_size=batch_size, verbose=verbose)
def standardize_predict_inputs(model: Model, x: np.ndarray) -> List[np.ndarray]: x = _standardize_input_data(x, model._feed_input_names, model._feed_input_shapes) if model.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + [0.] else: ins = x return ins
def predict(self, X, X_tr=None, Y_tr=None, batch_size=32, return_var=False, verbose=0): """Generate output predictions for the input samples batch by batch. Arguments: ---------- X : np.ndarray or list of np.ndarrays batch_size : uint (default: 128) return_var : bool (default: False) Whether predictive variance is returned. verbose : uint (default: 0) Verbosity mode, 0 or 1. Returns: -------- preds : a list or a tuple of lists Lists of output predictions and variance estimates. """ # Update GP data if provided (and grid if necessary) if X_tr is not None and Y_tr is not None: X_tr, Y_tr, _ = self._standardize_user_data(X_tr, Y_tr, sample_weight=None, class_weight=None, check_batch_axis=False, batch_size=batch_size) H_tr = self.transform(X_tr, batch_size=batch_size) for gp, h, y in zip(self.output_gp_layers, H_tr, Y_tr): gp.backend.update_data('tr', h, y) if gp.update_grid: gp.backend.update_grid('tr') # Validate user data X = _standardize_input_data(X, self._feed_input_names, self._feed_input_shapes, check_batch_axis=False, exception_prefix='input') H = self.transform(X, batch_size=batch_size) preds = [] for gp, h in zip(self.output_gp_layers, H): preds.append(gp.backend.predict(h, return_var=return_var)) if return_var: preds = map(list, zip(*preds)) return preds
def time_delay_generator_AE(x, delays, batch_size, shuffle=True, conv3d=False): '''A generator to make it easy to fit time-delay regression models, i.e. a model where the value of y depends on past values of x # Arguments x: input data, as a Numpy array y: targets, as a Numpy array or None for prediction generation delays: number of time-steps to include in model weights: Numpy array of weights for the samples shuffle: Whether or not to shuffle the data (set True for training) # Example if X_train is (1000,200), Y_train is (1000,1) train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100) train_gen is a generator that gives: x_batch as size (100,10,200) since each of the 100 samples includes the input data at the current and nine previous time steps y_batch as size (100,1) w_batch as size (100,) ''' index_array = np.arange(x.shape[0]) if conv3d: tlist = [1, 2, 0] + range(3, np.ndim(x) + 1) else: tlist = [1, 0] + range(2, np.ndim(x) + 1) batches = _make_batches(x.shape[0], batch_size) while 1: if shuffle: np.random.shuffle(index_array) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)] x_batch = _standardize_input_data(x[batch_ids, :].transpose(tlist), ['x_batch']) y_batch = _standardize_input_data( np.copy(x_batch[0]).reshape((x_batch[0].shape[0], -1)), ['y_batch']) yield (x_batch, y_batch)
def generate_training_data(train_gen, batch_num): zoo_input_data = [] zoo_label = [] count = 0 while True: for tag, generator in train_gen.items(): genfun = generator.get_batch_generator() for input_data, y_true_value in genfun: count += 1 if count > batch_num: return (zoo_input_data, zoo_label) names = ['query', 'doc'] shapes = [(None, 10), (None, 40)] list_input_data = _standardize_input_data( input_data, names, shapes, check_batch_axis=False) zoo_input_data.append(list_input_data) y_true_value = np.expand_dims(y_true_value, 1) zoo_label.append(y_true_value)
def eval(eval_gen, eval_metrics, zmodel): for tag, generator in eval_gen.items(): genfun = generator.get_batch_generator() print('[%s]\t[Eval:%s] ' % (time.strftime( '%m-%d-%Y %H:%M:%S', time.localtime(time.time())), tag), end='') res = dict([[k, 0.] for k in eval_metrics.keys()]) num_valid = 0 for input_data, y_true in genfun: names = ['query', 'doc'] shapes = [(None, 10), (None, 40)] list_input_data = _standardize_input_data(input_data, names, shapes, check_batch_axis=False) preprocessed_input_data = np.concatenate( (list_input_data[0], list_input_data[1]), axis=1) y_pred = zmodel.forward(preprocessed_input_data) if issubclass(type(generator), inputs.list_generator.ListBasicGenerator): list_counts = input_data['list_counts'] for k, eval_func in eval_metrics.items(): for lc_idx in range(len(list_counts) - 1): pre = list_counts[lc_idx] suf = list_counts[lc_idx + 1] res[k] += eval_func(y_true=y_true[pre:suf], y_pred=y_pred[pre:suf]) num_valid += len(list_counts) - 1 else: for k, eval_func in eval_metrics.items(): res[k] += eval_func(y_true=y_true, y_pred=y_pred) num_valid += 1 generator.reset() i_e = 0 print('Iter:%d\t%s' % (i_e, '\t'.join( ['%s=%f' % (k, v / num_valid) for k, v in res.items()])), end='\n') sys.stdout.flush()
def predict(self, x, batch_size=32, learning_phase=0., verbose=0): """Generates output predictions for the input samples. Computation is done in batches. # Arguments x: the input data, as a Numpy array (or list of Numpy arrays if the model has multiple outputs). batch_size: integer. verbose: verbosity mode, 0 or 1. # Returns Numpy array(s) of predictions. # Raises ValueError: In case of mismatch between the provided input data and the model's expectations, or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ # validate user data x = _standardize_input_data(x, self._feed_input_names, self._feed_input_shapes, check_batch_axis=False) if self.stateful: if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0: raise ValueError('In a stateful network, ' 'you should only pass inputs with ' 'a number of samples that can be ' 'divided by the batch size. Found: ' + str(x[0].shape[0]) + ' samples. ' 'Batch size: ' + str(batch_size) + '.') # prepare inputs, delegate logic to _predict_loop if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + [learning_phase] else: ins = x self._make_predict_function() f = self.predict_function return self._predict_loop(f, ins, batch_size=batch_size, verbose=verbose)
def finetune(self, X, Y, batch_size=32, gp_n_iter=1, verbose=1): """Finetune the output GP layers assuming the network is pre-trained. Arguments: ---------- X : np.ndarray or list of np.ndarrays Y : np.ndarray or list of np.ndarrays batch_size : uint (default: 128) Batch size used for data streaming through the network. gp_n_iter : uint (default: 100) Number of iterations for GP training. verbose : uint (default: 1) Verbosity mode, 0 or 1. """ # Validate user data X = _standardize_input_data(X, self._feed_input_names, self._feed_input_shapes, check_batch_axis=False, exception_prefix='input') H = self.transform(X, batch_size=batch_size) if verbose: print("Finetuning output GPs...") for gp, h, y in zip(self.output_gp_layers, H, Y): # Update GP data (and grid if necessary) gp.backend.update_data('tr', h, y) if gp.update_grid: gp.backend.update_grid('tr') # Train GP gp.hyp = gp.backend.train(gp_n_iter, verbose=verbose) if verbose: print("Done.")
def keras_generator(self, delays=7, batch_size=400, cell=0, scale=5, flatten=True, center=None, crop_size=None, shuffle=True, color_chan=False, log_transform_events=True, correct_eye_pos=False, gaussian_filter=0): from keras.engine.training import _standardize_input_data, _make_batches, _standardize_sample_weights if type(cell) is int: cell = [cell] if type(delays) is int: delays = range(delays) (stim, events, frame_numbers, weights, shifts) = self.vectorize_data(delays) evidx = np.where(events)[0] print(str(len(frame_numbers)) + ' Samples') print(str(len(evidx)) + ' Events') if correct_eye_pos: sh = stim.shape shift_stim_shape = (len(shifts), sh[1] + 2*np.maximum(self.min_max_shift[1][0], -self.min_max_shift[0][0]) + 3, sh[2] + 2*np.maximum(self.min_max_shift[1][1], -self.min_max_shift[0][1]) + 3) out_stim = np.zeros(shift_stim_shape, dtype='float32') shifts = shifts + [shift_stim_shape[1]/2, shift_stim_shape[2]/2] good_shift_locations = ~np.isnan(shifts[:, 0]) for dd in delays: weights[np.minimum(np.where(np.isnan(shifts[:,0]))[0] + dd, len(weights)-1)] = 0 for i in range(len(shifts)): if good_shift_locations[i]: # print(-sh[1]/2 + np.int32(shifts[i, 0])) # print(np.int32(shifts[i, 0]) + sh[1]/2) out_stim[i, -sh[1]/2 + np.int32(shifts[i, 0]):np.int32(shifts[i, 0]) + sh[1]/2, -sh[2]/2 + np.int32(shifts[i, 1]):np.int32(shifts[i, 1]) + sh[2]/2] = stim[frame_numbers[i]] stim = out_stim frame_numbers_i = np.arange(len(frame_numbers)) else: frame_numbers_i = frame_numbers if color_chan: stim = stim[:, None, :, :] if crop_size is not None and center is not None: crop_range = np.arange(-crop_size/2, crop_size/2) stim = stim[:, (center[0]-crop_size/2):(center[0]+crop_size/2), (center[1]-crop_size/2):(center[1]+crop_size/2)] if flatten: stim = stim.reshape(stim.shape[0], -1) events = np.asarray(events) events = events[cell].T * scale if log_transform_events: events = np.log(1 + events) if gaussian_filter > 0: events = gaussian_filter1d(events, gaussian_filter) index_array = np.arange(events.shape[0]) tlist = [1, 0] + list(range(2, np.ndim(stim) + 1)) batches = _make_batches(events.shape[0], batch_size) while 1: if shuffle: np.random.shuffle(index_array) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] frame_numbers_b = frame_numbers[batch_ids] batch_ids_stim = [frame_numbers_i[np.maximum(0, batch_ids - d)] for d in delays] x_batch = _standardize_input_data(stim[batch_ids_stim, :].transpose(tlist), ['x_batch']) y_batch = _standardize_input_data(events[batch_ids, :], ['y_batch']) w_batch = weights[batch_ids] w_batch[frame_numbers_b < delays[-1]] = 0. w_batch = _standardize_sample_weights(w_batch, ['w_batch']) yield (x_batch, y_batch, w_batch)
def keras_generator(self, event_type='OASIS', delays=7, batch_size=400, shift=True): from keras.engine.training import _standardize_input_data if event_type not in self.events.keys(): raise ValueError('Please specifiy one of the following for event_type: ' + str(self.events.keys())) movie_dict = self._movie_warps for (ds, msl, sl, cfn, dff, ci) in zip(self.datasets, self._movie_sample_list, self.shift_locs, self.corrected_frame_numbers, self.events[event_type], self.cell_indicies): for (movie_name, sl2, cfn2, dff2) in zip(msl[0], sl, cfn, dff): if movie_name not in movie_dict.keys(): tmp_movie = self._get_stimulus_template(ds, movie_name) # bar = Bar('Processing ' + movie_name, max=len(tmp_movie)) tmp = self.warp_movie_to_screen(tmp_movie[0], movie_name) tmp_warp = np.zeros((len(tmp_movie), tmp.shape[0], tmp.shape[1]), dtype='uint8') for i in range(len(tmp_movie)): tmp_warp[i] = self.warp_movie_to_screen(tmp_movie[i], movie_name) # bar.next() with open('/tmp/' + movie_name + '_' + str(self.downsample) + '.pickle', 'wb') as handle: pickle.dump(tmp_warp, handle, protocol=pickle.HIGHEST_PROTOCOL) movie_dict[movie_name] = tmp_warp # bar.finish() # ssg = self._make_shifted_stim_resp_generator(movie_dict[movie_name], sl2, cfn2, dff2) original_stim = movie_dict[movie_name] frame_numbers = cfn2 shift_locations = sl2 resp = dff2 sh = original_stim.shape idx = range(0, len(frame_numbers), batch_size) # print(idx) for cut in idx: sl3 = shift_locations[cut:cut+batch_size] fn = frame_numbers[cut:cut+batch_size] resp_out = resp[:, cut:cut+batch_size] # make larger stim defined by maximum shifts with a little extra slack shift_stim_shape = (len(sl3), sh[1] + 2*np.maximum(self.min_max_shift[1][0], -self.min_max_shift[0][0]) + 2, sh[2] + 2*np.maximum(self.min_max_shift[1][1], -self.min_max_shift[0][1]) + 2) if shift: out_stim = np.zeros(shift_stim_shape, dtype='float32') else: out_stim = np.zeros((len(sl3), original_stim.shape[1], original_stim.shape[2]), dtype='float32') sl3 = sl3 + [shift_stim_shape[1]/2, shift_stim_shape[2]/2] good_shift_locations = ~np.isnan(sl3[:, 0]) for i in range(len(sl3)): if shift: if good_shift_locations[i]: out_stim[i, -sh[1]/2 + np.int32(sl3[i, 0]):np.int32(sl3[i, 0]) + sh[1]/2, -sh[2]/2 + np.int32(sl3[i, 1]):np.int32(sl3[i, 1]) + sh[2]/2] = original_stim[fn[i]] else: out_stim[i] = original_stim[fn[i]] x = out_stim batch_ids = np.arange(x.shape[0]) # print(batch_ids) tlist = [1, 0] + list(range(2, np.ndim(x) + 1)) batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)] x_batch = _standardize_input_data(x[batch_ids, :].transpose(tlist), ['x_batch']) yield (x_batch, resp_out)
def fit(self, X, Y, X_U, batch_size=32, epochs=1, gp_n_iter=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, **kwargs): """Trains the model for a fixed number of epochs (iterations on a dataset). For argument details, refer to `keras.engine.training.Model.fit`. Notes: The following arguments are currently unsupported by models with GP output layers: - validation_split - class_weight - sample_weight """ # Validate user data X, Y, _ = self._standardize_user_data(X, Y, sample_weight=None, class_weight=None, check_batch_axis=False, batch_size=batch_size) if validation_data is not None: X_val, Y_val, _ = self._standardize_user_data( *validation_data, sample_weight=None, class_weight=None, check_batch_axis=False, batch_size=batch_size) validation_data = (X_val, Y_val) X_U = _standardize_input_data(X_U, self._feed_input_names, self._feed_input_shapes, check_batch_axis=False, exception_prefix='input') # Setup GP updates update_gp = UpdateSSDKL(ins=(X, Y), unlabeled_ins=X_U, val_ins=validation_data, batch_size=batch_size, gp_n_iter=gp_n_iter, verbose=verbose) callbacks = [update_gp] + (callbacks or []) return super(Model, self).fit(X, Y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, shuffle=shuffle, initial_epoch=initial_epoch, **kwargs)
def predict(config): ######## Read input config ######## print(json.dumps(config, indent=2), end='\n') input_conf = config['inputs'] share_input_conf = input_conf['share'] # collect embedding if 'embed_path' in share_input_conf: embed_dict = read_embedding(filename=share_input_conf['embed_path']) _PAD_ = share_input_conf['vocab_size'] - 1 embed_dict[_PAD_] = np.zeros((share_input_conf['embed_size'], ), dtype=np.float32) embed = np.float32( np.random.uniform(-0.02, 0.02, [ share_input_conf['vocab_size'], share_input_conf['embed_size'] ])) share_input_conf['embed'] = convert_embed_2_numpy(embed_dict, embed=embed) else: embed = np.float32( np.random.uniform(-0.2, 0.2, [ share_input_conf['vocab_size'], share_input_conf['embed_size'] ])) share_input_conf['embed'] = embed print('[Embedding] Embedding Load Done.', end='\n') # list all input tags and construct tags config input_predict_conf = OrderedDict() for tag in input_conf.keys(): if 'phase' not in input_conf[tag]: continue if input_conf[tag]['phase'] == 'PREDICT': input_predict_conf[tag] = {} input_predict_conf[tag].update(share_input_conf) input_predict_conf[tag].update(input_conf[tag]) print('[Input] Process Input Tags. %s in PREDICT.' % (input_predict_conf.keys()), end='\n') # collect dataset identification dataset = {} for tag in input_conf: if tag == 'share' or input_conf[tag]['phase'] == 'PREDICT': if 'text1_corpus' in input_conf[tag]: datapath = input_conf[tag]['text1_corpus'] if datapath not in dataset: dataset[datapath], _ = read_data(datapath) if 'text2_corpus' in input_conf[tag]: datapath = input_conf[tag]['text2_corpus'] if datapath not in dataset: dataset[datapath], _ = read_data(datapath) print('[Dataset] %s Dataset Load Done.' % len(dataset), end='\n') # initial data generator predict_gen = OrderedDict() for tag, conf in input_predict_conf.items(): print(conf, end='\n') conf['data1'] = dataset[conf['text1_corpus']] conf['data2'] = dataset[conf['text2_corpus']] generator = inputs.get(conf['input_type']) predict_gen[tag] = generator( #data1 = dataset[conf['text1_corpus']], #data2 = dataset[conf['text2_corpus']], config=conf) ######## Read output config ######## output_conf = config['outputs'] ######## Load Model ######## global_conf = config["global"] weights_file = str(global_conf['weights_file']) + '.' + str( global_conf['test_weights_iters']) zmodel, kmodel = load_model(config) # test y_pred from zoo model and keras model # keras2_y_pred = kmodel.predict(input_data, batch_size=batch_size) # y_pred = model.forward(input_data) # # y_pred = model.predict(input_data, distributed=False) # equal = np.allclose(y_pred, keras2_y_pred, rtol=1e-5, atol=1e-5) # print(equal) # return y_pred eval_metrics = OrderedDict() for mobj in config['metrics']: mobj = mobj.lower() if '@' in mobj: mt_key, mt_val = mobj.split('@', 1) eval_metrics[mobj] = metrics.get(mt_key)(int(mt_val)) else: eval_metrics[mobj] = metrics.get(mobj) res = dict([[k, 0.] for k in eval_metrics.keys()]) # batch_size = 20 # query_data = np.random.randint(0, 10000, [batch_size, 10]) # doc_data = np.random.randint(0, 10000, [batch_size, 40]) # input_data = [query_data, doc_data] # keras2_y_pred = keras2_model.predict(input_data, batch_size=batch_size) # y_pred = model.predict(input_data, distributed=False) # equal = np.allclose(y_pred, keras2_y_pred, rtol=1e-5, atol=1e-5) for tag, generator in predict_gen.items(): genfun = generator.get_batch_generator() print('[%s]\t[Predict] @ %s ' % (time.strftime( '%m-%d-%Y %H:%M:%S', time.localtime(time.time())), tag), end='') num_valid = 0 res_scores = {} for input_data, y_true in genfun: ky_pred = kmodel.predict(input_data, batch_size=len(y_true)) names = ['query', 'doc'] shapes = [(None, 10), (None, 40)] list_input_data = _standardize_input_data(input_data, names, shapes, check_batch_axis=False) # list_input_data = [data[0:2, :] for data in list_input_data] # y_pred = zmodel.predict(list_input_data, distributed=False) y_pred = zmodel.forward(list_input_data) equal = np.allclose(y_pred, ky_pred, rtol=1e-5, atol=1e-5) print(equal) if issubclass(type(generator), inputs.list_generator.ListBasicGenerator): list_counts = input_data['list_counts'] for k, eval_func in eval_metrics.items(): for lc_idx in range(len(list_counts) - 1): pre = list_counts[lc_idx] suf = list_counts[lc_idx + 1] res[k] += eval_func(y_true=y_true[pre:suf], y_pred=y_pred[pre:suf]) y_pred = np.squeeze(y_pred) for lc_idx in range(len(list_counts) - 1): pre = list_counts[lc_idx] suf = list_counts[lc_idx + 1] for p, y, t in zip(input_data['ID'][pre:suf], y_pred[pre:suf], y_true[pre:suf]): if p[0] not in res_scores: res_scores[p[0]] = {} res_scores[p[0]][p[1]] = (y, t) num_valid += len(list_counts) - 1 else: for k, eval_func in eval_metrics.items(): res[k] += eval_func(y_true=y_true, y_pred=y_pred) for p, y, t in zip(input_data['ID'], y_pred, y_true): if p[0] not in res_scores: res_scores[p[0]] = {} res_scores[p[0]][p[1]] = (y[1], t[1]) num_valid += 1 generator.reset() if tag in output_conf: if output_conf[tag]['save_format'] == 'TREC': with open(output_conf[tag]['save_path'], 'w') as f: for qid, dinfo in res_scores.items(): dinfo = sorted(dinfo.items(), key=lambda d: d[1][0], reverse=True) for inum, (did, (score, gt)) in enumerate(dinfo): f.write('%s\tQ0\t%s\t%d\t%f\t%s\t%s\n' % (qid, did, inum, score, config['net_name'], gt)) elif output_conf[tag]['save_format'] == 'TEXTNET': with open(output_conf[tag]['save_path'], 'w') as f: for qid, dinfo in res_scores.items(): dinfo = sorted(dinfo.items(), key=lambda d: d[1][0], reverse=True) for inum, (did, (score, gt)) in enumerate(dinfo): f.write('%s %s %s %s\n' % (gt, qid, did, score)) print('[Predict] results: ', '\t'.join(['%s=%f' % (k, v / num_valid) for k, v in res.items()]), end='\n') sys.stdout.flush()
def predict(model, batch_size, num_outputs, save_path, evaluate=False, liver_only=False, save_predictions=False, initial_epoch=0, **kwargs): model, callbacks, gen = prepare_model(model=model, num_outputs=num_outputs, liver_only=liver_only, evaluate=evaluate, **kwargs) # Set up prediction file. if save_predictions: save_path = os.path.join(save_path, "predictions.zarr") if os.path.exists(save_path): os.remove(save_path) # Initialize callbacks val_callback_list = [BaseLogger()] if not liver_only: val_callback_list.extend( [callbacks['dice_lesion'], callbacks['dice_lesion_inliver']]) if len(model.outputs) == 2 or liver_only: val_callback_list.append(callbacks['dice_liver']) val_callbacks = CallbackList(val_callback_list) val_callbacks.set_params({ 'nb_epoch': 0, 'nb_sample': 0, 'verbose': False, 'do_validation': True, 'metrics': model.metrics_names }) val_callbacks.on_train_begin() val_callbacks.on_epoch_begin(0) # Create theano function if evaluate: inputs = model.inputs + model.targets + model.sample_weights if model.uses_learning_phase and \ not isinstance(K.learning_phase(), int): inputs += [K.learning_phase()] predict_function = K.function(inputs, model.outputs + [model.total_loss] + model.metrics_tensors, updates=model.state_updates) else: inputs = model.inputs if model.uses_learning_phase and \ not isinstance(K.learning_phase(), int): inputs += [K.learning_phase()] predict_function = K.function(inputs, model.outputs, updates=model.state_updates) # Predict for all data. print(' > Predicting...') for key in gen: print(' - DATA: {}'.format(key)) # Duplicate inputs and outputs (and add outputs) as necessary. flow = repeat_flow(gen[key].flow(), num_outputs=num_outputs) # Set up file. if save_predictions: zgroup = zarr.open_group(store=save_path, mode='a', path="/") zarr_kwargs = { 'chunks': (1, 512, 512), 'compressor': zarr.Blosc(cname='lz4', clevel=9, shuffle=1) } # Predict and write to file. batch_num = 0 for vol_num, volume in enumerate(flow): print("Predicting on `{}` - {}/{}" "".format(key, vol_num + 1, len(gen[key]))) # Begin writing to file. if save_predictions: vol_idx = volume[-1] subgroup = zgroup.create_group(str(vol_idx)) num_channels = np.sum(model.output_shape[i][1] \ for i in range(num_outputs)) output_shape = \ (len(volume[0]), num_channels)+model.output_shape[0][2:] subgroup.empty("volume", shape=output_shape, dtype=np.float32, **zarr_kwargs) segmentation = volume[1] if isinstance(segmentation, list): segmentation = segmentation[0] subgroup.create_dataset("segmentation", shape=segmentation.shape, data=segmentation, dtype=np.int16, **zarr_kwargs) # Iterate through volume batch-wise. for idx0, idx1 in zip( range(0, len(volume[0]), batch_size), range(batch_size, len(volume[0]) + batch_size + 1, batch_size)): # Prepare data for joint evaluation and prediction. if evaluate: batch = (volume[0][idx0:idx1], volume[1][idx0:idx1]) x, y, sample_weights = model._standardize_user_data( batch[0], batch[1]) ins = x + y + sample_weights else: batch = (volume[0][idx0:idx1], ) ins = _standardize_input_data(batch[0], model._feed_input_names, model._feed_input_shapes, check_batch_axis=False, exception_prefix='input') if model.uses_learning_phase and \ not isinstance(K.learning_phase(), int): ins += [0.] # Jointly evaluate and predict. outputs = predict_function(ins) if num_outputs == 1: predictions = outputs[0:1] if evaluate: val_metrics = outputs[1:] elif num_outputs == 2: predictions = outputs[0:2] if evaluate: val_metrics = outputs[2:] else: raise ValueError("num_outputs must be 1 or 2") # Write predictions. predictions = np.concatenate(predictions, axis=1) subgroup['volume'][idx0:idx1] = predictions # Update metrics if evaluate: val_logs = OrderedDict( zip(model.metrics_names, val_metrics)) val_logs.update({ 'batch': batch_num, 'size': len(batch[0]) }) val_callbacks.on_batch_end(batch_num, val_logs) batch_num += 1 if evaluate: # Update metrics val_callbacks.on_epoch_end(0, val_logs) # Output metrics for m in val_logs: if m not in ['batch', 'size']: print("{}: {}".format(m, val_logs[m]))