def trainer( train_set: np.ndarray, test_set: np.ndarray, size_dict: Dict[int, int], model: str = "ResNet50", batch_size: int = 500, num_epochs: int = 10, learning_rate: float = 0.001, weight_decay: float = 0, dropout: float = 0, ) -> float: """ Get the best test accuracy during training for `num_epochs` epochs. """ # create dataloader train_loader = tf.data.Dataset.from_tensor_slices(train_set) test_loader = tf.data.Dataset.from_tensor_slices(test_set) train_loader = train_loader.shuffle( buffer_size=train_set[1].shape[0], reshuffle_each_iteration=True ).batch(batch_size) test_loader = test_loader.shuffle( buffer_size=test_set[1].shape[0], reshuffle_each_iteration=False ).batch(batch_size) # set model and optimizer num_classes = len(size_dict) if model == "ResNet50": model = ResNet50(num_classes, dropout) optimizer = tf.optimizers.Adam(learning_rate=learning_rate) # here class encoding is necessary since we need the dimension # of one-hot encoding identical to the number of classes class_encoding = {class_id: i for i, (class_id, _) in enumerate(size_dict.items())} # start training best_acc = 0 for epoch in range(num_epochs): for images, labels in train_loader: labels = np.vectorize(lambda id: class_encoding[id])(labels) with tf.GradientTape() as g: # forward pass preds = model(images, training=True) loss = cross_entropy_loss(preds, labels) l2_loss = weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in model.trainable_variables] ) loss += l2_loss # backward pass grad = g.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grad, model.trainable_variables)) # test after each epoch accuracies = [] for images, labels in test_loader: labels = np.vectorize(lambda id: class_encoding[id])(labels) preds = model(images, training=False) batch_acc = accuracy(preds, labels) accuracies.append(batch_acc) epoch_acc = sum(accuracies) / len(accuracies) best_acc = max(best_acc, epoch_acc) return float(round(best_acc, 4))
def model_predict(model, eval_input_fn, epoch): """Display evaluate result.""" prediction_result = model.predict(eval_input_fn) click_sum = 0.0 predictions = [] user_id_list = [] labels = [] num_samples = FLAGS.batch_size * FLAGS.predict_steps num_pre_samples = 0 print(num_samples) for pred_dict in prediction_result: # print(pred_dict) user_id = pred_dict['user_id'][0] p = pred_dict['probabilities'][0] label = float(pred_dict['label'][0]) click_sum += p predictions.append(p) user_id_list.append(user_id) labels.append(label) if (p >= 0.5): num_pre_samples += 1 if len(predictions) % (num_samples / 10) == 0: tf.logging.info( 'predict at step %d/%d', int( float(len(predictions)) / num_samples * FLAGS.predict_steps), FLAGS.predict_steps) if len(predictions) >= num_samples: break #tf.metrics.precision # print(len(predictions)) num_samples = len(predictions) print('the predicted positive samples is: ' + str(num_pre_samples)) # Display evaluation metrics label_mean = sum(labels) / num_samples prediction_mean = sum(predictions) / num_samples loss = sum(cross_entropy_loss( labels, predictions)) / num_samples * FLAGS.batch_size auc = roc_auc_score(labels, predictions) group_auc = cal_group_auc(labels, predictions, user_id_list) predict_diff = np.array(predictions) - prediction_mean predict_diff_square_sum = sum(np.square(predict_diff)) s_deviation = np.sqrt(predict_diff_square_sum / num_samples) c_deviation = s_deviation / prediction_mean true_positive_samples = (np.array(predictions) * np.array(labels) >= 0.5).tolist().count(True) false_positive_samples = (np.array(predictions) * (1 - np.array(labels)) >= 0.5).tolist().count(True) print(true_positive_samples) print(false_positive_samples) # precision = float(true_positive_samples)/(true_positive_samples+false_positive_samples) precision = 0 false_negative_samples = (np.array(predictions) * np.array(labels) < 0.5).tolist().count(True) recall = float(true_positive_samples) / (true_positive_samples + false_negative_samples) print(false_negative_samples) tf.logging.info('Results at epoch %d/%d', (epoch + 1), FLAGS.num_epochs) tf.logging.info('-' * 60) tf.logging.info('label/mean: %s' % label_mean) tf.logging.info('predictions/mean: %s' % prediction_mean) tf.logging.info('total loss average batchsize: %s' % loss) tf.logging.info('standard deviation: %s' % s_deviation) tf.logging.info('coefficient of variation: %s' % c_deviation) tf.logging.info('precision: %s' % precision) tf.logging.info('recall: %s' % recall) tf.logging.info('auc: %s' % auc) tf.logging.info('group auc: %s' % group_auc)
def main(unused_argv): train_files = [] eval_files = [] if isinstance(FLAGS.train_data_dir, str): train_files = list_hdfs_dir(FLAGS.train_data_dir) if isinstance(FLAGS.eval_data_dir, str): eval_files = list_hdfs_dir(FLAGS.eval_data_dir) random.shuffle(train_files) feature_columns = build_model_columns() session_config = tf.ConfigProto(device_count={ 'GPU': 1, 'CPU': 10 }, inter_op_parallelism_threads=10, intra_op_parallelism_threads=10 # log_device_placement=True ) session_config.gpu_options.per_process_gpu_memory_fraction = 0.32 run_config = tf.estimator.RunConfig().replace( model_dir=FLAGS.model_dir, session_config=session_config, log_step_count_steps=1000, save_summary_steps=20000, save_checkpoints_secs=1000) model = tf.estimator.Estimator(model_fn=dfm_model_fn, params={ 'feature_columns': feature_columns, 'hidden_units': FLAGS.hidden_units.split(','), 'learning_rate': FLAGS.learning_rate, 'use_fm': FLAGS.use_fm }, config=run_config) train_input_fn = lambda: feature_input_fn(train_files, 1, True, FLAGS. batch_size) eval_input_fn = lambda: feature_input_fn( eval_files, 1, False, FLAGS.batch_size) # not shuffle for evaluate for epoch in range(FLAGS.num_epochs): if FLAGS.evaluate_only == False: model.train(train_input_fn) print("*" * 100) #results = model.evaluate(input_fn=eval_input_fn, steps=200) prediction_result = model.predict(eval_input_fn) click_sum = 0.0 predictions = [] user_id_list = [] labels = [] num_samples = FLAGS.batch_size * FLAGS.predict_steps num_pre_samples = 0 print(num_samples) for pred_dict in prediction_result: #print(pred_dict) user_id = pred_dict['user_id'][0] p = pred_dict['probabilities'][0] label = float(pred_dict['label'][0]) click_sum += p predictions.append(p) user_id_list.append(user_id) labels.append(label) if (p >= 0.5): num_pre_samples += 1 if len(predictions) % (num_samples / 10) == 0: tf.logging.info( 'predict at step %d/%d', int( float(len(predictions)) / num_samples * FLAGS.predict_steps), FLAGS.predict_steps) if len(predictions) >= num_samples: break tf.metrics.precision #print(len(predictions)) num_samples = len(predictions) print('the predicted positive samples is: ' + str(num_pre_samples)) # Display evaluation metrics label_mean = sum(labels) / num_samples prediction_mean = sum(predictions) / num_samples loss = sum(cross_entropy_loss( labels, predictions)) / num_samples * FLAGS.batch_size auc = roc_auc_score(labels, predictions) group_auc = cal_group_auc(labels, predictions, user_id_list) predict_diff = np.array(predictions) - prediction_mean predict_diff_square_sum = sum(np.square(predict_diff)) s_deviation = np.sqrt(predict_diff_square_sum / num_samples) c_deviation = s_deviation / prediction_mean true_positive_samples = (np.array(predictions) * np.array(labels) >= 0.5).tolist().count(True) false_positive_samples = (np.array(predictions) * (1 - np.array(labels)) >= 0.5).tolist().count(True) print(true_positive_samples) print(false_positive_samples) #precision = float(true_positive_samples)/(true_positive_samples+false_positive_samples) precision = 0 false_negative_samples = (np.array(predictions) * np.array(labels) < 0.5).tolist().count(True) recall = float(true_positive_samples) / (true_positive_samples + false_negative_samples) print(false_negative_samples) tf.logging.info('Results at epoch %d/%d', (epoch + 1), FLAGS.num_epochs) tf.logging.info('-' * 60) tf.logging.info('label/mean: %s' % label_mean) tf.logging.info('predictions/mean: %s' % prediction_mean) tf.logging.info('total loss average batchsize: %s' % loss) tf.logging.info('standard deviation: %s' % s_deviation) tf.logging.info('coefficient of variation: %s' % c_deviation) tf.logging.info('precision: %s' % precision) tf.logging.info('recall: %s' % recall) tf.logging.info('auc: %s' % auc) tf.logging.info('group auc: %s' % group_auc) # Export the model if FLAGS.export_dir is not None: export_model(model, FLAGS.export_dir, feature_columns)