def logistic_regression( dataset_fn, name='logistic_regression', ): """Bayesian logistic regression with a Gaussian prior. Args: dataset_fn: A function to create a classification data set. The dataset must have binary labels. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity`. """ with tf.name_scope(name) as name: dataset = dataset_fn() num_train_points = dataset.train_features.shape[0] num_test_points = dataset.test_features.shape[0] have_test = num_test_points > 0 # Add bias. train_features = tf.concat( [dataset.train_features, tf.ones([num_train_points, 1])], axis=-1) train_labels = tf.convert_to_tensor(dataset.train_labels) test_features = tf.concat( [dataset.test_features, tf.ones([num_test_points, 1])], axis=-1) test_labels = tf.convert_to_tensor(dataset.test_labels) num_features = int(train_features.shape[1]) root = tfd.JointDistributionCoroutine.Root zero = tf.zeros(num_features) one = tf.ones(num_features) def model_fn(features): weights = yield root(tfd.Independent(tfd.Normal(zero, one), 1)) logits = tf.einsum('nd,...d->...n', features, weights) yield tfd.Independent(tfd.Bernoulli(logits=logits), 1) train_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, features=train_features)) test_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, features=test_features)) dist = joint_distribution_posterior.JointDistributionPosterior( train_joint_dist, (None, train_labels)) expectations = { 'params': target_spec.expectation( fn=lambda params: params[0], human_name='Parameters', ) } if have_test: expectations['test_nll'] = target_spec.expectation( fn=lambda params: ( # pylint: disable=g-long-lambda -test_joint_dist.sample_distributions(value=params)[0][-1]. log_prob(test_labels)), human_name='Test NLL', ) expectations['per_example_test_nll'] = target_spec.expectation( fn=lambda params: ( # pylint: disable=g-long-lambda -test_joint_dist.sample_distributions(value=params)[0][-1]. distribution.log_prob(test_labels)), human_name='Per-example Test NLL', ) return target_spec.TargetDensity.from_distribution( distribution=dist, constraining_bijectors=(tfb.Identity(), ), expectations=expectations, code_name='{}_{}'.format(dataset.code_name, name), human_name='{} Logistic Regression'.format(dataset.human_name), )
def item_response_theory( dataset_fn, name='item_response_theory', ): """One-parameter logistic item-response theory (IRT) model. Args: dataset_fn: A function to create an IRT data set. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity`. """ with tf.name_scope(name) as name: dataset = dataset_fn() have_test = dataset.test_student_ids.shape[0] > 0 num_students = dataset.train_student_ids.max() num_questions = dataset.train_question_ids.max() if have_test: num_students = max(num_students, dataset.test_student_ids.max()) num_questions = max(num_questions, dataset.test_question_ids.max()) # TODO(siege): Make it an option to use a sparse encoding, the choice # clearly depends on the dataset sparsity. def make_dense_encoding(student_ids, question_ids, correct): dense_y = np.zeros([num_students, num_questions], np.float32) y_mask = np.zeros_like(dense_y) dense_y[student_ids - 1, question_ids - 1] = (correct) y_mask[student_ids - 1, question_ids - 1] = 1. return dense_y, y_mask train_dense_y, train_y_mask = make_dense_encoding( dataset.train_student_ids, dataset.train_question_ids, dataset.train_correct, ) test_dense_y, test_y_mask = make_dense_encoding( dataset.test_student_ids, dataset.test_question_ids, dataset.test_correct, ) root = tfd.JointDistributionCoroutine.Root def model_fn(dense_y, y_mask): """Model definition.""" mean_student_ability = yield root(tfd.Normal(0.75, 1.)) student_ability = yield root( tfd.Independent(tfd.Normal(0., tf.ones([dense_y.shape[0]])), 1)) question_difficulty = yield root( tfd.Independent(tfd.Normal(0., tf.ones([dense_y.shape[1]])), 1)) logits = (mean_student_ability[Ellipsis, tf.newaxis, tf.newaxis] + student_ability[Ellipsis, tf.newaxis] - question_difficulty[Ellipsis, tf.newaxis, :]) masked_logits = logits * y_mask - 1e10 * (1 - y_mask) yield tfd.Independent(tfd.Bernoulli(masked_logits), 2) train_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, train_dense_y, train_y_mask)) test_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, test_dense_y, test_y_mask)) dist = joint_distribution_posterior.JointDistributionPosterior( train_joint_dist, (None, None, None, train_dense_y)) expectations = { 'params': target_spec.expectation( fn=lambda params: tf.concat( # pylint: disable=g-long-lambda (params[0][Ellipsis, tf.newaxis], ) + params[1:], axis=-1), human_name='Parameters', ) } if have_test: expectations['test_nll'] = target_spec.expectation( fn=lambda params: ( # pylint: disable=g-long-lambda -test_joint_dist.sample_distributions(value=params)[0][-1]. log_prob(test_dense_y)), human_name='Test NLL', ) def per_example_test_nll(params): """Computes per-example test NLL.""" test_y_idx = np.stack([ dataset.test_student_ids - 1, dataset.test_question_ids - 1 ], axis=-1) dense_nll = (-test_joint_dist.sample_distributions( value=params)[0][-1].distribution.log_prob(test_dense_y)) vectorized_dense_nll = tf.reshape( dense_nll, [-1, num_students, num_questions]) # TODO(siege): Avoid using vmap here. log_prob_y = tf.vectorized_map( lambda nll: tf.gather_nd(nll, test_y_idx), vectorized_dense_nll) return tf.reshape( log_prob_y, list(params[0].shape) + [test_y_idx.shape[0]]) expectations['per_example_test_nll'] = target_spec.expectation( fn=per_example_test_nll, human_name='Per-example Test NLL', ) return target_spec.TargetDensity.from_distribution( distribution=dist, constraining_bijectors=(tfb.Identity(), tfb.Identity(), tfb.Identity()), expectations=expectations, code_name='{}_{}'.format(dataset.code_name, name), human_name='{} 1PL Item-Response Theory'.format( dataset.human_name), )