def in_memory_train_eval(estimator: tf.estimator.Estimator, model: EstimatorConvModel): dataset_provider = model.dataset_provider train_steps = config[consts.TRAIN_STEPS] eval_steps_interval = config[consts.EVAL_STEPS_INTERVAL] if config[consts.EXCLUDED_KEYS]: eval_name = filenames.create_excluded_name_fragment() else: eval_name = None evaluator = tf.contrib.estimator.InMemoryEvaluatorHook( estimator=estimator, input_fn=lambda: dataset_provider.eval_input_fn(), every_n_iter=eval_steps_interval, name=eval_name) hooks = [evaluator] if config[consts.EXCLUDED_KEYS]: e = tf.contrib.estimator.InMemoryEvaluatorHook( estimator=estimator, input_fn=lambda: dataset_provider.eval_with_excludes_input_fn(), every_n_iter=eval_steps_interval, name='full') hooks.append(e) estimator.train(input_fn=lambda: dataset_provider.train_input_fn(), steps=train_steps, hooks=hooks)
def train_and_test(estimator: tf.estimator.Estimator, train_input_fn, test_input_fn, steps, steps_between_evals, eval_steps): eval_results = estimator.evaluate(input_fn=test_input_fn, steps=eval_steps) print(eval_results) for i in range(steps // steps_between_evals): estimator.train( input_fn=train_input_fn, steps=steps_between_evals, ) eval_results = estimator.evaluate(input_fn=test_input_fn, steps=eval_steps) print(eval_results)
def train(model: tf.estimator.Estimator, nb_epochs: int, train_data_path: str, val_data_path: str, batch_size: int = 32): train_epoch_history = [ model.evaluate( input_fn=lambda: load_dataset(train_data_path, shuffle=False)) ] validation_epoch_history = [ model.evaluate( input_fn=lambda: load_dataset(val_data_path, shuffle=False)) ] for epoch in range(nb_epochs): model_spec = model.train( input_fn=lambda: load_dataset('data/train.tfrecords', epochs=1, shuffle=True, batch_size=batch_size)) train_epoch_history.append( model.evaluate( input_fn=lambda: load_dataset(train_data_path, shuffle=False))) validation_epoch_history.append( model.evaluate( input_fn=lambda: load_dataset(val_data_path, shuffle=False))) logging.info(f"EPOCH: {epoch}:\n" f"\tval_loss: {validation_epoch_history[-1]['loss']}\n" f"\ttrain_loss: {train_epoch_history[-1]['loss']}\n") return train_epoch_history, validation_epoch_history
def fit_model_on_fold(self, compiled_model: tf.estimator.Estimator, curr_fold_indices, train_sequences, test_sequences): """ trains compiled (but previously unfitted) model against given indices :param compiled_model: :param curr_fold_indices: :param train_sequences: :param test_sequences: :return: """ def train_input_fn(features, labels, batch_size): dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) dataset = dataset.shuffle(10000).repeat().batch(batch_size) return dataset def eval_input_fn(features, labels, batch_size): """use for both validation and prediction""" features = dict(features) if labels is None: inputs = features else: inputs = (features, labels) dataset = tf.data.Dataset.from_tensor_slices(inputs) return dataset.batch(batch_size) train_indices, val_indices = curr_fold_indices x_train = {'sequence': train_sequences[train_indices]} y_train = self.raw_train_df[self.target_cols].iloc[train_indices].values x_val = {'sequence': train_sequences[val_indices]} y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values compiled_model.train(input_fn=lambda: train_input_fn(x_train, y_train, self.batch_size), steps=self.epochs * len(train_indices) // self.batch_size,) lambda_input_fn = lambda: eval_input_fn(x_val, None, self.batch_size) val_predictions = compiled_model.predict(lambda_input_fn) val_prob = np.array([x['probabilities'] for x in val_predictions]) val_roc_auc_score = roc_auc_score(y_val, val_prob) print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score)) x_test = {'sequence': test_sequences} lambda_input_fn = lambda: eval_input_fn(x_test, None, self.batch_size) test_predictions = compiled_model.predict(input_fn=lambda_input_fn) test_prob = np.array([x['probabilities'] for x in test_predictions]) return val_roc_auc_score, test_prob