Exemplo n.º 1
0
def evaluate_fairness(data_split, model, rank_weights, labels, num_samples):
    cutoff = rank_weights.size
    scores = model(data_split.feature_matrix).numpy()[:, 0]

    result = 0.
    squared_result = 0.
    for qid in range(data_split.num_queries()):
        q_scores = data_split.query_values_from_vector(qid, scores)
        q_labels = data_split.query_values_from_vector(qid, labels)
        if np.sum(q_labels) > 0 and q_labels.size > 1:
            sampled_rankings = pl.gumbel_sample_rankings(q_scores,
                                                         num_samples,
                                                         cutoff=cutoff)[0]

            q_n_docs = q_labels.shape[0]
            q_cutoff = min(cutoff, q_n_docs)
            doc_exposure = np.zeros(q_n_docs, dtype=np.float64)
            np.add.at(doc_exposure, sampled_rankings, rank_weights[:q_cutoff])
            doc_exposure /= num_samples

            swap_reward = doc_exposure[:, None] * q_labels[None, :]

            q_result = np.mean((swap_reward - swap_reward.T)**2.)
            q_result *= q_n_docs / (q_n_docs - 1.)

            q_squared = np.mean(np.abs(swap_reward - swap_reward.T))
            q_squared *= q_n_docs / (q_n_docs - 1.)

            result += q_result
            squared_result += q_squared
    result /= data_split.num_queries()
    squared_result /= data_split.num_queries()
    return result, squared_result
Exemplo n.º 2
0
def evaluate_expected(data_split, model, rank_weights, labels, ideal_metrics,
                      num_samples):
    cutoff = rank_weights.size
    scores = model(data_split.feature_matrix).numpy()[:, 0]

    result = 0.
    query_normalized_result = 0.
    for qid in range(data_split.num_queries()):
        q_scores = data_split.query_values_from_vector(qid, scores)
        q_labels = data_split.query_values_from_vector(qid, labels)
        sampled_rankings = pl.gumbel_sample_rankings(q_scores,
                                                     num_samples,
                                                     cutoff=cutoff)[0]
        q_result = np.mean(
            np.sum(rank_weights[None, :sampled_rankings.shape[1]] *
                   q_labels[sampled_rankings],
                   axis=1),
            axis=0)
        result += q_result
        if ideal_metrics[qid] != 0:
            query_normalized_result += q_result / ideal_metrics[qid]
    result /= data_split.num_queries()
    query_normalized_result /= data_split.num_queries()
    normalized_result = result / np.mean(ideal_metrics)
    return result, normalized_result, query_normalized_result
Exemplo n.º 3
0
def lambdarank(rank_weights, labels, scores, n_samples):
    n_docs = scores.shape[0]
    cutoff = min(rank_weights.shape[0], n_docs)
    (_, sampled_inv_rankings, _, _,
     gumbel_scores) = pl.gumbel_sample_rankings(scores,
                                                n_samples,
                                                cutoff=None,
                                                inverted=True,
                                                return_gumbel=True)
    (greater_i,
     lesser_i) = np.where(np.greater(labels[:, None], labels[None, :]))
    delta_rank = np.abs(sampled_inv_rankings[:, greater_i] -
                        sampled_inv_rankings[:, lesser_i])
    if n_docs > cutoff:
        safe_rank_weights = np.zeros(n_docs)
        safe_rank_weights[:cutoff] = rank_weights
    else:
        safe_rank_weights = rank_weights

    delta_weight = np.mean(safe_rank_weights[delta_rank - 1] -
                           safe_rank_weights[delta_rank],
                           axis=0)
    pair_weight = delta_weight * (labels[greater_i] - labels[lesser_i])
    exp_score_diff = np.exp(
        np.minimum(scores[greater_i] - scores[lesser_i], 100))

    pair_deriv = pair_weight * exp_score_diff / (
        (exp_score_diff + 1.) * np.log(2.))

    doc_weights = np.zeros(n_docs, dtype=np.float64)
    np.add.at(doc_weights, greater_i, pair_deriv)
    np.add.at(doc_weights, lesser_i, -pair_deriv)

    return doc_weights
Exemplo n.º 4
0
def single_ranking_generation(qid,
                              data_split,
                              doc_weights,
                              alpha,
                              beta,
                              model=None,
                              all_policy_scores=None,
                              return_scores=False):
    assert model is not None or policy_scores is not None

    n_docs = data_split.query_size(qid)
    cutoff = min(alpha.shape[0], n_docs)

    if all_policy_scores is None:
        q_feat = data_split.query_feat(qid)
        policy_scores = model(q_feat)[:, 0].numpy()
    else:
        policy_scores = data_split.query_values_from_vector(
            qid, all_policy_scores)

    rankings = pl.gumbel_sample_rankings(policy_scores, 1, cutoff)[0]

    q_doc_weights = data_split.query_values_from_vector(qid, doc_weights)
    clicks = generate_clicks(rankings, q_doc_weights, alpha, beta)

    if return_scores:
        return rankings[0, :], clicks[0, :], policy_scores
    else:
        return rankings[0, :], clicks[0, :]
Exemplo n.º 5
0
def PL_rank_2(rank_weights, labels, scores, n_samples=None, sampled_rankings=None):
  n_docs = labels.shape[0]
  result = np.zeros(n_docs, dtype=np.float64)
  cutoff = min(rank_weights.shape[0], n_docs)

  assert n_samples is not None or sampled_rankings is not None
  if sampled_rankings is None:
    sampled_rankings = pl.gumbel_sample_rankings(
                                    scores,
                                    n_samples,
                                    cutoff=cutoff)[0]
  else:
    n_samples = sampled_rankings.shape[0]

  srange = np.arange(n_samples)
  crange = np.arange(cutoff)

  relevant_docs = np.where(np.not_equal(labels, 0))[0]
  n_relevant_docs = relevant_docs.size

  weighted_labels = labels[sampled_rankings]*rank_weights[None,:cutoff]
  cumsum_labels = np.cumsum(weighted_labels[:,::-1], axis=1)[:,::-1]

  np.add.at(result, sampled_rankings[:,:-1], cumsum_labels[:,1:])
  result /= n_samples

  ninf_mask = np.zeros((n_samples, cutoff-1, n_docs), dtype=np.float64)
  ninf_mask[srange[:,None],
            crange[None,:-1],
            sampled_rankings[:,:-1]] = np.NINF
  ninf_mask[:,:] = np.cumsum(ninf_mask, axis=1)

  tiled_scores = np.tile(scores[None,None,:], (n_samples, cutoff, 1))
  tiled_scores[:,1:,:] += ninf_mask
  max_per_rank = np.max(tiled_scores, axis=2)
  tiled_scores -= max_per_rank[:,:,None]

  denom_per_rank = np.log(np.sum(np.exp(tiled_scores), axis=2))
  prob_per_rank = np.exp(tiled_scores - denom_per_rank[:,:,None])

  result -= np.mean(
    np.sum(prob_per_rank*cumsum_labels[:,:,None], axis=1)
    , axis=0, dtype=np.float64)
  result[relevant_docs] += np.mean(
    np.sum(prob_per_rank[:,:,relevant_docs]*(
                          rank_weights[None,:cutoff,None]
                          *labels[None,None,relevant_docs]), axis=1)
    , axis=0, dtype=np.float64)

  return result
Exemplo n.º 6
0
def single_query_generation(qid,
                            data_split,
                            n_samples,
                            doc_weights,
                            alpha,
                            beta,
                            model=None,
                            all_policy_scores=None,
                            return_display=False,
                            store_per_rank=False):
    assert model is not None or policy_scores is not None

    n_docs = data_split.query_size(qid)
    cutoff = min(alpha.shape[0], n_docs)

    if all_policy_scores is None:
        q_feat = data_split.query_feat(qid)
        policy_scores = model(q_feat)[:, 0].numpy()
    else:
        policy_scores = data_split.query_values_from_vector(
            qid, all_policy_scores)

    rankings = pl.gumbel_sample_rankings(policy_scores, n_samples, cutoff)[0]

    q_doc_weights = data_split.query_values_from_vector(qid, doc_weights)
    clicks = generate_clicks(rankings, q_doc_weights, alpha, beta)

    if store_per_rank:
        store_cutoff = alpha.shape[0]
        clicks_per_doc = np.zeros((n_docs, store_cutoff), dtype=np.int32)
        ind_tile = np.tile(np.arange(cutoff)[None, :], (n_samples, 1))
        np.add.at(clicks_per_doc, (rankings[clicks], ind_tile[clicks]), 1)
    else:
        clicks_per_doc = np.zeros(n_docs, dtype=np.int32)
        np.add.at(clicks_per_doc, rankings[clicks], 1)

    if return_display:
        if store_per_rank:
            displays_per_doc = np.zeros((n_docs, store_cutoff), dtype=np.int32)
            np.add.at(displays_per_doc, (rankings, ind_tile), 1)
        else:
            displays_per_doc = np.zeros(n_docs, dtype=np.int32)
            np.add.at(displays_per_doc, rankings, 1)
    else:
        displays_per_doc = None

    return clicks_per_doc, displays_per_doc
Exemplo n.º 7
0
def placement_policy_gradient(rank_weights,
                              labels,
                              scores,
                              n_samples=None,
                              sampled_rankings=None):
    n_docs = labels.shape[0]
    result = np.zeros(n_docs, dtype=np.float64)
    cutoff = min(rank_weights.shape[0], n_docs)

    np_scores = scores.numpy()[:, 0]
    assert n_samples is not None or sampled_rankings is not None
    if sampled_rankings is None:
        sampled_rankings = pl.gumbel_sample_rankings(np_scores,
                                                     n_samples,
                                                     cutoff=cutoff)[0]
    else:
        n_samples = sampled_rankings.shape[0]

    srange = np.arange(n_samples)
    crange = np.arange(cutoff)

    ninf_mask = np.zeros((n_samples, cutoff, n_docs), dtype=bool)
    ninf_mask[srange[:, None], crange[None, 1:],
              sampled_rankings[:, :-1]] = True
    ninf_mask[:, :] = np.cumsum(ninf_mask, axis=1)

    sampled_scores = tf.gather(scores, sampled_rankings)[:, :, 0]
    tiled_scores = tf.tile(scores[None, None, :, 0], (n_samples, cutoff, 1))
    tiled_scores = tf.where(ninf_mask, np.NINF, tiled_scores)
    max_per_rank = np.max(tiled_scores, axis=2)
    tiled_scores -= max_per_rank[:, :, None]

    sample_denom = tf.reduce_logsumexp(tiled_scores, axis=2)
    sample_log_prob = sampled_scores - sample_denom

    rewards = rank_weights[None, :cutoff] * labels[sampled_rankings]
    cum_rewards = tf.cumsum(rewards, axis=1, reverse=True)

    result = tf.reduce_sum(
        tf.reduce_mean(sample_log_prob * cum_rewards, axis=0))
    return -result
Exemplo n.º 8
0
def prob_per_rank_query(n_samples, cutoff, policy_scores):
    n_docs = policy_scores.size
    q_cutoff = min(cutoff, policy_scores.size)
    if n_docs <= 1:
        return np.ones((n_docs, q_cutoff))
    rankings = pl.gumbel_sample_rankings(policy_scores, n_samples, q_cutoff)[0]
    freq_per_rank = np.zeros((n_docs, q_cutoff), dtype=np.int64)
    np.add.at(freq_per_rank,
              (rankings[:, :-1], np.arange(q_cutoff - 1)[None, :]), 1)
    prob_per_rank = freq_per_rank.astype(np.float64) / n_samples

    scores_per_ranking = np.tile(policy_scores,
                                 (n_samples, 1)).astype(np.float64)
    scores_per_ranking[np.arange(n_samples)[:, None],
                       rankings[:, :-1]] = np.NINF
    scores_per_ranking -= np.amax(scores_per_ranking, axis=1)[:, None]
    denom = np.log(np.sum(np.exp(scores_per_ranking), axis=1))[:, None]

    prob_per_rank[:, -1] = np.mean(np.exp(scores_per_ranking - denom), axis=0)

    return prob_per_rank
Exemplo n.º 9
0
                              qid, train_labels)
    q_feat = data.train.query_feat(qid)

    if np.sum(q_labels) > 0 and q_labels.size > 1:
      q_n_docs = q_labels.shape[0]
      q_cutoff = min(cutoff, q_n_docs)
      q_metric_weights = metric_weights[:q_cutoff] #/q_ideal_metric
      with tf.GradientTape() as tape:
        q_tf_scores = model(q_feat)

        q_np_scores = q_tf_scores.numpy()[:,0]
        if args.loss == 'lambdaloss':
          (sampled_rankings, sampled_inv_rankings, _, _,
          gumbel_scores) = pl.gumbel_sample_rankings(
                                  q_np_scores,
                                  num_exposure_samples,
                                  cutoff=None,
                                  inverted=True,
                                  return_gumbel=True)
          sampled_rankings = sampled_rankings[:,:cutoff]
        else:
          sampled_rankings = pl.gumbel_sample_rankings(
                                        q_np_scores,
                                        num_exposure_samples,
                                        cutoff=q_cutoff)[0]

        doc_exposure = np.zeros(q_n_docs, dtype=np.float64)
        np.add.at(doc_exposure, sampled_rankings[:,1:], q_metric_weights[1:])
        doc_exposure /= num_exposure_samples

        max_score = np.amax(q_np_scores)
        first_prob = np.exp(q_np_scores-max_score)/np.sum(np.exp(q_np_scores-max_score))
Exemplo n.º 10
0
def optimize_policy(model,
                    optimizer,
                    data_train,
                    train_doc_weights,
                    train_alpha,
                    train_beta,
                    data_vali,
                    vali_doc_weights,
                    vali_alpha,
                    vali_beta,
                    n_grad_samples=100,
                    n_eval_samples=100,
                    max_epochs=50,
                    early_stop_diff=0.001,
                    early_stop_per_epochs=3,
                    print_updates=False):

    early_stop_per_epochs = min(early_stop_per_epochs, max_epochs)

    stacked_alphas = np.stack([train_alpha, vali_alpha], axis=-1)
    stacked_betas = np.stack([train_beta, vali_beta], axis=-1)

    cutoff = stacked_alphas.shape[0]

    policy_vali_scores = model(data_vali.feature_matrix)[:, 0].numpy()
    metrics = pl.datasplit_metrics(
        data_vali,
        policy_vali_scores,
        stacked_alphas,
        stacked_betas,
        vali_doc_weights,
        n_samples=n_eval_samples,
    )
    if print_updates:
        print('epoch %d: train %0.04f vali %0.04f' %
              (0, metrics[0], metrics[1]))
    first_metric_value = metrics[1]
    last_metric_value = metrics[1]

    best_metric_value = metrics[1]
    best_weights = model.get_weights()

    cum_doc_weights = np.cumsum(np.abs(train_doc_weights))
    start_weights = cum_doc_weights[data_train.doclist_ranges[:-1]]
    end_weights = cum_doc_weights[data_train.doclist_ranges[1:] - 1]
    qid_included = np.where(np.not_equal(start_weights, end_weights))[0]
    qid_included = np.random.permutation(qid_included)

    start_time = time.time()
    n_queries = qid_included.shape[0]
    for i in range(n_queries * max_epochs):
        qid = qid_included[i % n_queries]

        q_doc_weights = data_train.query_values_from_vector(
            qid, train_doc_weights)
        q_feat = data_train.query_feat(qid)
        q_cutoff = min(cutoff, data_train.query_size(qid))

        # print(q_doc_weights)

        with tf.GradientTape() as tape:

            tf_scores = model(q_feat)[:, 0]
            scores = tf_scores.numpy()

            sampled_rankings = pl.gumbel_sample_rankings(
                scores,
                n_grad_samples,
                cutoff=q_cutoff,
            )[0]
            gradient = pl.fast_gradient_based_on_samples(sampled_rankings,
                                                         train_alpha,
                                                         q_doc_weights,
                                                         scores,
                                                         cutoff=q_cutoff)

            # hybrid_gradient = pl.hybrid_gradient_based_on_samples(
            #                   sampled_rankings,
            #                   train_alpha,
            #                   q_doc_weights,
            #                   scores,
            #                   cutoff=q_cutoff)

            loss = -tf.reduce_sum(tf_scores * gradient)

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # reshuffle queries every epoch
        if (i + 1) % (n_queries) == 0:
            qid_included = np.random.permutation(qid_included)
        if (i + 1) % (n_queries * early_stop_per_epochs) == 0:
            epoch_i = (i + 1) / n_queries
            policy_vali_scores = model(data_vali.feature_matrix)[:, 0].numpy()
            metrics = pl.datasplit_metrics(
                data_vali,
                policy_vali_scores,
                stacked_alphas,
                stacked_betas,
                vali_doc_weights,
                n_samples=n_eval_samples,
            )
            abs_improvement = metrics[1] - last_metric_value
            if print_updates:
                improvement = metrics[1] / last_metric_value - 1.
                total_improvement = metrics[1] / first_metric_value - 1.
                average_time = (time.time() - start_time) / (i +
                                                             1.) * n_queries
                print('epoch %d: '
                      'train %0.04f '
                      'vali %0.04f '
                      'epoch-time %0.04f '
                      'abs-improvement %0.05f '
                      'improvement %0.05f '
                      'total-improvement %0.05f ' %
                      (epoch_i, metrics[0], metrics[1], average_time,
                       abs_improvement, improvement, total_improvement))
            last_metric_value = metrics[1]
            if best_metric_value < metrics[1]:
                best_weights = model.get_weights()
            if abs_improvement < early_stop_diff:
                break

    model.set_weights(best_weights)
    return model, last_metric_value