示例#1
0
def get_label_scores(params, replay):
    serp_len = params['serp_len']
    all_docs = replay['docs']
    batch_docs = replay['serp']
    batch_pos = replay['pos_order']
    max_n_docs = params['max_docs']
    n_docs = batch_docs.shape[0]
    hidden_state_size = params['hidden_state_size']

    init_hidden = tf.zeros([n_docs, hidden_state_size])

    doc_col = mu._shared_doc_embeddings(all_docs,
                                        params,
                                        '/label/doc_emb',
                                        label_network=True,
                                        inference=True,
                                        reuse_variable_scope=False)

    doc_emb = mu._shared_doc_embeddings(batch_docs[:, :-1, :],
                                        params,
                                        '/label/doc_emb',
                                        label_network=True,
                                        inference=True,
                                        reuse_variable_scope=True)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    gru = ru.get_gru_layer(params,
                           '/label/gru',
                           label_network=True,
                           inference=True,
                           reuse_variable_scope=False)

    pos = tf.cast(batch_pos, tf.float32)[:, :-1, None]
    pos = tf.transpose(pos, [1, 0, 2])
    gru_input = tf.concat([serp_emb, pos], axis=2)
    hidden_states = tf.scan(gru, gru_input, init_hidden)
    hidden_states = tf.transpose(hidden_states, [1, 0, 2])

    tiled_states = tf.tile(hidden_states[:, :, None, :], [1, 1, max_n_docs, 1])
    tiled_docs = tf.tile(doc_col[:, None, :, :], [1, serp_len - 1, 1, 1])

    score_input = tf.concat([tiled_states, tiled_docs], axis=3)
    return mu._create_subnetwork(score_input,
                                 params,
                                 subnetwork_name='/label/scoring',
                                 label_network=True,
                                 inference=True,
                                 reuse_variable_scope=False)
示例#2
0
def model(params, examples, labels, epsilon, stats_ops):
    serp_len = params['serp_len']
    doc_emb_size = params['doc_emb'][-1]
    hidden_state_size = params['hidden_state_size']
    docs = mu._get_doc_tensors(examples, params, 'main')

    result = {}

    n_docs = tf.shape(docs)[0]
    result['docs_per_query'] = n_docs

    doc_emb = mu._shared_doc_embeddings(docs,
                                        params,
                                        '/main/doc_emb',
                                        inference=True)

    hidden_init = tf.zeros([1, hidden_state_size])

    gru_fn = ru.get_gru_layer(params,
                              '/main/gru',
                              label_network=False,
                              inference=True,
                              reuse_variable_scope=False)

    policy = PositionEpsilonGreedy(serp_len, epsilon, n_docs)

    hidden_state = hidden_init  #tf.zeros([n_docs, hidden_state_size])
    serp = []
    serp_pos = []
    serp_labels = []
    serp_ind = []
    for i in range(serp_len):
        hidden_states = tf.tile(hidden_state, [n_docs, 1])
        score_input = tf.concat([hidden_states, doc_emb], axis=1)
        scores = mu._create_subnetwork(score_input,
                                       params,
                                       subnetwork_name='/main/scoring',
                                       label_network=False,
                                       reuse_variable_scope=i > 0,
                                       inference=True)
        for j in range(serp_len):
            mean_summary(params, 'policy_%d/pos_%d' % (i, j), scores[:, j],
                         stats_ops)

        action_ind, action_pos = policy.choose(scores)
        select_doc = tf.gather(docs, action_ind)

        serp.append(select_doc)
        serp_ind.append(action_ind)

        in_doc = tf.less(i, n_docs)
        serp_labels.append(
            tf.cond(
                in_doc,
                lambda: tf.gather(labels, action_ind, axis=0),
                lambda: tf.constant([[0]], dtype=tf.int64),
            ))
        serp_labels[-1].set_shape([1, 1])
        serp_pos.append(
            tf.cond(
                in_doc,
                lambda: tf.expand_dims(action_pos, axis=1),
                lambda: tf.constant([[serp_len]], dtype=tf.int32),
            ))
        serp_pos[-1].set_shape([1, 1])

        if i < serp_len - 1:
            a_pos = tf.expand_dims(tf.cast(action_pos, tf.float32), axis=1)
            a_doc = tf.gather(doc_emb, action_ind)
            gru_input = tf.concat([a_doc, a_pos], axis=1)
            hidden_state = gru_fn(hidden_state, gru_input)

    pos_order = tf.concat(serp_pos, axis=1)

    order_ind = tf.nn.top_k(-pos_order, serp_len)[1]
    # order_ind.set_shape()
    unordered_labels = tf.squeeze(tf.concat(serp_labels, axis=1), axis=0)
    ordered_labels = tf.gather(unordered_labels, order_ind)

    result['serp'] = tf.stack(serp, axis=1)
    result['serp_ind'] = tf.stack(serp_ind, axis=1)
    result['serp_doc'] = tf.stack(serp_ind, axis=1)
    result['labels'] = ordered_labels
    result['select_order_labels'] = unordered_labels[None, :]
    # pos_order = tf.Print(pos_order, [unordered_labels[i] for i in range(10)], 'unordered: ')
    # pos_order = tf.Print(pos_order, [pos_order[0, i] for i in range(10)], 'reranking: ')
    # pos_order = tf.Print(pos_order, [result['labels'][0, i] for i in range(10)], 'ordered: ')
    # pos_order = tf.Print(pos_order, [n_docs], '                        ')
    result['pos_order'] = pos_order

    # tf.summary.histogram("label/output", result['labels'])

    # if params['context_input']:
    max_docs = params['max_docs']
    padding = tf.convert_to_tensor([[0, max_docs - n_docs], [0, 0]])
    padded_docs = tf.pad(docs, padding, "CONSTANT")
    padded_docs = tf.reshape(padded_docs, [1, max_docs, docs.shape[1].value])
    result['docs'] = padded_docs
    return result
示例#3
0
def loss(params, replay, rewards):
    serp_len = params['serp_len']
    visible_dropout = params['visible_dropout']
    docs_in_query = replay['docs_per_query']
    batch_docs = replay['serp']
    batch_pos = replay['pos_order']
    max_n_docs = params['max_docs']
    n_docs = batch_docs.shape[0]
    hidden_state_size = params['hidden_state_size']

    drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

    doc_col = mu._shared_doc_embeddings(drop_col,
                                        params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)
    init_hidden = tf.zeros([n_docs, hidden_state_size])

    drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

    doc_emb = mu._shared_doc_embeddings(drop_docs,
                                        params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    gru = ru.get_gru_layer(params,
                           '/main/gru',
                           label_network=False,
                           inference=False,
                           reuse_variable_scope=True)

    pos = tf.expand_dims(tf.cast(batch_pos, tf.float32), axis=2)
    pos = tf.transpose(pos, [1, 0, 2])
    gru_input = tf.concat([serp_emb, pos], axis=2)
    hidden_states = tf.scan(gru, gru_input, init_hidden)
    score_states = tf.concat(
        [init_hidden[None, :, :], hidden_states[:-1, :, :]], axis=0)
    score_input = tf.concat([score_states, serp_emb], axis=2)
    pos_scores = mu._create_subnetwork(score_input,
                                       params,
                                       subnetwork_name='/main/scoring',
                                       label_network=False,
                                       reuse_variable_scope=True,
                                       inference=False)
    pos_scores = tf.transpose(pos_scores, [1, 0, 2])
    pos_filter = tf.one_hot(batch_pos, serp_len)

    scores = tf.reduce_sum(pos_scores * pos_filter, axis=2)
    unfiltered_mc_loss = (rewards - scores)**2

    max_filter = max_train_filter(params, hidden_states, serp_len, doc_col,
                                  replay['serp_ind'], batch_pos, docs_in_query,
                                  max_n_docs)

    label_scores = get_label_scores(params, replay)

    double_max_scores = tf.reduce_sum(max_filter * label_scores, axis=[2, 3])
    q_values = tf.concat([double_max_scores, rewards], axis=1)

    end_mask = tf.equal(docs_in_query - 1, tf.range(serp_len)[None, :])
    reward_tile = tf.tile(rewards, [1, serp_len])
    q_values = tf.where(end_mask, reward_tile, q_values)

    unfiltered_dqn_loss = (scores - q_values)**2

    doc_denom = tf.cast(tf.reduce_sum(docs_in_query), tf.float32)
    mask = tf.squeeze(tf.sequence_mask(docs_in_query, serp_len), axis=1)

    filtered_mc_loss = tf.where(mask, unfiltered_mc_loss,
                                tf.zeros_like(unfiltered_mc_loss))
    mc_loss = tf.reduce_sum(filtered_mc_loss) / doc_denom

    filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss,
                                 tf.zeros_like(unfiltered_dqn_loss))
    dqn_loss = tf.reduce_sum(filtered_dqn_loss) / doc_denom

    # dqn_loss = tf.Print(dqn_loss, [scores[0,i] for i in range(10)], 'Scores:')
    # dqn_loss = tf.Print(dqn_loss, [q_values[0,i] for i in range(10)], 'Q_values:')
    # dqn_loss = tf.Print(dqn_loss, [unfiltered_dqn_loss[0,i] for i in range(10)], 'Loss:')
    # dqn_loss = tf.Print(dqn_loss, [n_docs], 'DQN:')
    # mc_loss = tf.Print(mc_loss, [n_docs], 'MC:')

    tf.summary.scalar('monte_carlo/loss', mc_loss)
    tf.summary.scalar('DQN/loss', dqn_loss)

    tf.summary.scalar('DQN/double_max_scores',
                      tf.reduce_mean(double_max_scores))

    return mc_loss, dqn_loss
示例#4
0
def gru_model(params, examples, labels, epsilon):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  hidden_state_size = params['hidden_state_size']
  # docs = mu._get_doc_tensors(examples, params, 'main')
  docs = examples['doc_tensors']
  batch_size = docs.shape[0].value
  batch_max_docs = tf.shape(docs)[1]
  docs_per_query = examples['n_docs']

  # if params['context_input']:
  #   to_shuffle = tf.concat([tf.cast(labels, tf.float32), docs], axis=1)
  #   shuffled = tf.random_shuffle(to_shuffle)
  #   labels = tf.cast(tf.slice(shuffled, [0, 0], [-1, 1]), tf.int64)
  #   docs = tf.slice(shuffled, [0, 1], [-1, -1])
  assert not params['context_input'], 'Context not supported for GRU.'

  result = {
    'docs_per_query': docs_per_query,
    }

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb',
                                      inference=True)


  hidden_init = tf.zeros([batch_size, hidden_state_size])
  # if params['context_input']:
  #   context_gru_fn = ru.get_gru_layer(params, '/main/gru/context',
  #                                     label_network=False,
  #                                     inference=True,
  #                                     reuse_variable_scope=False)
  #   context_init = hidden_init
  #   context = tf.scan(context_gru_fn,
  #                     tf.expand_dims(doc_emb, axis=1), context_init)

  #   hidden_init = tf.gather(context, n_docs-1)

  gru_fn = ru.get_gru_layer(params, '/main/gru',
                                  label_network=False,
                                  inference=True,
                                  reuse_variable_scope=False)

  policy = mu.EpsilonGreedy(epsilon, batch_size, batch_max_docs, docs_per_query)
  hidden_state = hidden_init
   #tf.zeros([n_docs, hidden_state_size])
  serp = []
  serp_labels = []
  serp_ind = []
  for i in range(serp_len):
    hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1])
    score_input = tf.concat([hidden_states, doc_emb], axis=2)
    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=i>0,
                                   inference=True)

    tf.summary.scalar('policy/scores/pos_%d' % i, tf.reduce_mean(scores))
    action = policy.choose(scores)
    serp_ind.append(action)

    nd_ind = tf.stack([tf.range(batch_size, dtype=tf.int64), action], axis=1)
    select_doc = tf.gather_nd(docs, nd_ind)
    select_labels = tf.gather_nd(labels, nd_ind)[:, None]
    
    serp_labels.append(tf.where(
      tf.less(i, docs_per_query),
      select_labels,
      tf.zeros([batch_size, 1], dtype=tf.int32),
      ))
    serp.append(select_doc)

    if i < serp_len-1:
      select_emb = tf.gather_nd(doc_emb, nd_ind)
      hidden_state = gru_fn(hidden_state, select_emb)


  result['serp'] = tf.stack(serp, axis=1)
  result['serp_ind'] = tf.stack(serp_ind, axis=1)
  result['serp_ind'] = tf.Print(result['serp_ind'], serp_ind, 'serp_ind: ')
  result['labels'] = tf.concat(serp_labels, axis=1)
  tf.summary.histogram("label/output", result['labels'])

  # if params['context_input']:
  max_docs = params['max_docs']
  padding = tf.convert_to_tensor([[0, 0], [0, max_docs-batch_max_docs], [0, 0]])
  padded_docs = tf.pad(docs, padding, "CONSTANT")
  padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value])
  result['docs'] = padded_docs

  return result
示例#5
0
def calculate_gru_loss(params, replay, rewards):
  serp_len = params['serp_len']
  visible_dropout = params['visible_dropout']
  docs_in_query = replay['docs_per_query']
  batch_docs = replay['serp']
  max_n_docs = params['max_docs']
  n_docs = tf.shape(batch_docs)[0]
  hidden_state_size = params['hidden_state_size']

  drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

  doc_col = mu._shared_doc_embeddings(drop_col, params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)
  # if params['context_input']:
  #   gru = ru.get_gru_layer(params, '/main/gru/collection/',
  #                                label_network=False,
  #                                inference=False,
  #                                reuse_variable_scope=False)
  #   input_col = tf.transpose(doc_col, [1, 0, 2])

  #   hidden_col = tf.scan(gru, input_col, init_hidden)

  #   hidden_col = tf.transpose(hidden_col, [1, 0, 2])

  #   indices = tf.stack([tf.range(n_docs), tf.squeeze(docs_in_query-1 ,axis=1)], axis=1)
  #   init_hidden = tf.gather_nd(hidden_col, indices)
  # else:
  init_hidden = tf.zeros([n_docs, hidden_state_size])

  drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

  doc_emb = mu._shared_doc_embeddings(drop_docs, params,
                                      '/main/doc_emb',
                                      label_network=False,
                                      inference=False,
                                      reuse_variable_scope=True)

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_layer(params, '/main/gru',
                               label_network=False,
                               inference=False,
                               reuse_variable_scope=True)

  init_scores = tf.zeros([n_docs, 1])
  hidden_states = tf.scan(gru, serp_emb[:-1, :, :], init_hidden)
  hidden_states = tf.concat([init_hidden[None, :, :], hidden_states], axis=0)
  scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=i>0,
                                   inference=True)


  scores = tf.squeeze(tf.transpose(scores, [1,0,2]), axis=2)
  unfiltered_mc_loss = (rewards-scores)**2.

  max_filter = max_train_filter(gru, hidden_states, serp_len,
                                doc_col, replay['serp_ind'],
                                docs_in_query, max_n_docs)

  label_scores = get_label_scores(params, replay)

  double_max_scores = tf.reduce_sum(max_filter*label_scores, axis=2)
  q_values = tf.concat([double_max_scores, rewards], axis=1)

  end_mask = tf.equal(docs_in_query-1,
                      tf.expand_dims(tf.range(serp_len), axis=0))
  reward_tile = tf.tile(rewards, [1, serp_len])
  q_values = tf.where(end_mask, reward_tile, q_values)

  unfiltered_dqn_loss = (scores - q_values)**2.

  doc_denom = tf.cast(tf.reduce_sum(tf.minimum(docs_in_query, serp_len)), tf.float32)
  mask = tf.squeeze(tf.sequence_mask(docs_in_query, serp_len), axis=1)


  filtered_mc_loss = tf.where(mask,
                           unfiltered_mc_loss,
                           tf.zeros_like(unfiltered_mc_loss))
  mc_loss = tf.reduce_sum(filtered_mc_loss)/doc_denom

  filtered_dqn_loss = tf.where(mask,
                               unfiltered_dqn_loss,
                               tf.zeros_like(unfiltered_dqn_loss))

  dqn_loss = tf.reduce_sum(filtered_dqn_loss)/doc_denom

  # tf.summary.scalar('q_loss/loss', mean_losses[1])

  tf.summary.scalar('monte_carlo/loss', mc_loss)
  tf.summary.scalar('DQN/loss', dqn_loss)

  filtered_double_max = tf.where(mask[:,:-1],
                                 double_max_scores,
                                 tf.zeros_like(double_max_scores))
  double_max_denom = doc_denom - tf.cast(n_docs, tf.float32)
  double_max_mean = tf.reduce_sum(filtered_double_max)/double_max_denom
  tf.summary.scalar('DQN/double_max_scores', double_max_mean)

  return mc_loss, dqn_loss
示例#6
0
def model(params, examples, labels, epsilon, stats_ops):
  serp_len = params['serp_len']
  doc_emb_size = params['doc_emb'][-1]
  hidden_state_size = params['hidden_state_size']
  docs = examples['doc_tensors']
  batch_size = docs.shape[0].value
  batch_max_docs = tf.shape(docs)[1]
  docs_per_query = examples['n_docs']

  result = {
    'docs_per_query': docs_per_query,
    }

  doc_emb = mu._shared_doc_embeddings(docs, params,
                                      '/main/doc_emb',
                                      inference=True)

  hidden_init = tf.zeros([batch_size, hidden_state_size])

  gru_fn = ru.get_gru_layer(params, '/main/gru',
                            label_network=False,
                            inference=True,
                            reuse_variable_scope=False)

  policy = PositionEpsilonGreedy(serp_len, epsilon, batch_size,
                                 batch_max_docs, docs_per_query)

  hidden_state = hidden_init
  serp = []
  serp_pos = []
  serp_labels = []
  serp_ind = []
  for i in range(serp_len):
    hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1])
    score_input = tf.concat([hidden_states, doc_emb], axis=2)
    doc_scores = mu._create_subnetwork(score_input,
                                       params,
                                       subnetwork_name='/main/scoring/doc',
                                       label_network=False,
                                       reuse_variable_scope=i>0,
                                       inference=True,
                                       n_output=1)

    action_ind = policy.choose_doc(doc_scores)

    ind_nd = tf.stack([tf.range(batch_size, dtype=tf.int64), action_ind],
                      axis=1)

    select_doc = tf.gather_nd(docs, ind_nd)

    serp.append(select_doc) 
    serp_ind.append(action_ind)

    select_emb = tf.gather_nd(doc_emb, ind_nd)
    pos_input = tf.concat([hidden_state, select_emb], axis=1)
    pos_scores = mu._create_subnetwork(pos_input,
                                       params,
                                       subnetwork_name='/main/scoring/pos',
                                       label_network=False,
                                       reuse_variable_scope=i>0,
                                       inference=True,
                                       n_output=10)
    # pos_scores = tf.Print(pos_scores, [pos_scores[0,x] for x in range(10)], 'scores %d: ' % i)

    mean_summary(params, 'policy_%d/doc' % i,
                 tf.gather_nd(doc_scores, ind_nd), stats_ops)
    for j in range(serp_len):
      mean_summary(params, 'policy_%d/pos_%d' % (i, j),
                   pos_scores[:, j], stats_ops)

    action_pos = policy.choose_pos(pos_scores)
    # if i == 0:
    #   action_pos = tf.Print(action_pos, [pos_scores[0,x] for x in range(10)], 'pos_scores: ')
    #   action_pos = tf.Print(action_pos, [action_pos], 'pos: ')

    in_doc = tf.less(i, docs_per_query[:, 0])
    serp_labels.append(tf.where(
      in_doc,
      tf.gather_nd(labels, ind_nd),
      tf.zeros([batch_size], dtype=tf.int32),
      ))
    serp_pos.append(tf.where(
      in_doc,
      action_pos,
      tf.fill([batch_size], tf.cast(serp_len, dtype=tf.int64)),
      ))

    if i < serp_len-1:
      a_pos = tf.cast(action_pos, tf.float32)[:, None]
      gru_input = tf.concat([select_emb, a_pos], axis=1)
      hidden_state = gru_fn(hidden_state, gru_input)

  pos_order = tf.stack(serp_pos, axis=1)
  _, order_ind = tf.nn.top_k(-pos_order, serp_len)
  unordered_labels = tf.stack(serp_labels, axis=1)
  batch_ind_nd = tf.tile(tf.range(batch_size)[:, None], [1, serp_len])
  order_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                           tf.reshape(order_ind, [-1])],
                           axis=1)
  ordered_labels = tf.gather_nd(unordered_labels, order_ind_nd)
  ordered_labels = tf.reshape(ordered_labels, [batch_size, serp_len])
  
  result['serp'] = tf.stack(serp, axis=1)
  result['serp_ind'] = tf.stack(serp_ind, axis=1)
  result['labels'] = ordered_labels
  result['select_order_labels'] = unordered_labels
  result['pos_order'] = pos_order

  max_docs = params['max_docs']
  padding = tf.convert_to_tensor([[0, 0], [0, max_docs-batch_max_docs], [0, 0]])
  padded_docs = tf.pad(docs, padding, "CONSTANT")
  padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value])
  result['docs'] = padded_docs
  return result
示例#7
0
def loss(params, replay, rewards, doc_rewards):
  serp_len = params['serp_len']
  visible_dropout = params['visible_dropout']
  docs_per_query = replay['docs_per_query']
  batch_docs = replay['serp']
  batch_pos = replay['pos_order']
  max_n_docs = params['max_docs']
  batch_size = batch_docs.shape[0]
  hidden_state_size = params['hidden_state_size']
  doc_level_rewards = params['doc_rewards']

  mask = tf.squeeze(tf.sequence_mask(docs_per_query, serp_len), axis=1)

  init_hidden = tf.zeros([batch_size, hidden_state_size])

  drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

  doc_col = mu._shared_doc_embeddings(drop_col, params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)

  drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

  doc_emb = mu._shared_doc_embeddings(drop_docs, params,
                                      '/main/doc_emb',
                                      inference=False,
                                      reuse_variable_scope=True)

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_layer(params, '/main/gru',
                               label_network=False,
                               inference=False,
                               reuse_variable_scope=True)

  pos = tf.cast(batch_pos, tf.float32)[:, :, None]
  pos = tf.transpose(pos, [1, 0, 2])
  gru_input = tf.concat([serp_emb, pos], axis=2)
  hidden_states = tf.scan(gru, gru_input, init_hidden)
  hidden_states = tf.concat([init_hidden[None, :, :],
                            hidden_states[:-1, :, :]], axis=0)
  hidden_states = tf.transpose(hidden_states, [1, 0, 2])

  score_input = tf.concat([hidden_states, doc_emb], axis=2)
  doc_scores = mu._create_subnetwork(score_input,
                                     params,
                                     subnetwork_name='/main/scoring/doc',
                                     label_network=False,
                                     reuse_variable_scope=True,
                                     inference=False)[:, :, 0]
  pos_scores = mu._create_subnetwork(score_input,
                                     params,
                                     subnetwork_name='/main/scoring/pos',
                                     label_network=False,
                                     reuse_variable_scope=True,
                                     inference=False,
                                     n_output=serp_len)

  batch_pos_filtered = tf.where(mask,
                                batch_pos,
                                tf.zeros_like(batch_pos))
  batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len])
  serp_ind_nd = tf.tile(tf.range(serp_len, dtype=tf.int64)[:, None], [batch_size, 1])
  pos_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                         tf.reshape(serp_ind_nd, [-1]),
                         tf.reshape(batch_pos_filtered, [-1]),
                        ], axis=1)
  pos_scores = tf.gather_nd(pos_scores, pos_ind_nd)
  pos_scores = tf.reshape(pos_scores, [batch_size, serp_len])

  if not doc_level_rewards:
    unfiltered_mc_loss = (rewards-pos_scores)**2 + (rewards-doc_scores)**2
  else:
    cum_rewards = tf.cumsum(doc_rewards, axis=1, reverse=True)
    unfiltered_mc_loss = (cum_rewards-pos_scores)**2 + (cum_rewards-doc_scores)**2

  max_doc_ind, max_pos = max_train_doc_pos(params, hidden_states,
                                        doc_emb, serp_len, doc_col,
                                        replay['serp_ind'], batch_pos,
                                        docs_per_query, max_n_docs)

  label_doc_scores, q_pos_values = get_label_scores(params, replay, max_doc_ind, max_pos)

  if not doc_level_rewards:
    q_doc_values = tf.concat([label_doc_scores, rewards], axis=1)
    end_mask = tf.equal(docs_per_query-1,
                        tf.range(serp_len)[None, :])
    reward_tile = tf.tile(rewards, [1, serp_len])
    q_doc_values = tf.where(end_mask, reward_tile, q_doc_values)
  else:
    zero_end = tf.zeros([batch_size, 1])
    q_doc_values = tf.concat([label_doc_scores, zero_end], axis=1)
    end_mask = tf.equal(docs_per_query-1,
                        tf.range(serp_len)[None, :])
    q_doc_values = tf.where(end_mask, tf.zeros_like(q_doc_values), q_doc_values)
    q_doc_values += doc_rewards


    # q_doc_values = tf.Print(q_doc_values, [batch_pos[0,x] for x in range(10)], 'pos: ')
    # q_doc_values = tf.Print(q_doc_values, [pos_scores[0,x] for x in range(10)], 'pos_scores: ')
    # q_doc_values = tf.Print(q_doc_values, [q_doc_values[0,x] for x in range(10)], 'q-values: ')
    # q_doc_values = tf.Print(q_doc_values, [doc_rewards[0,x] for x in range(10)], 'doc_rewards: ')


  unfiltered_doc_loss = (doc_scores - q_pos_values)**2
  unfiltered_pos_loss = (pos_scores - q_doc_values)**2
  unfiltered_dqn_loss = unfiltered_doc_loss + unfiltered_pos_loss

  query_denom = tf.cast(docs_per_query[:, 0], tf.float32)
  query_denom = tf.minimum(query_denom, serp_len)
  query_denom = tf.maximum(query_denom, tf.ones_like(query_denom))

  filtered_mc_loss = tf.where(mask,
                              unfiltered_mc_loss,
                              tf.zeros_like(unfiltered_mc_loss))
  mc_loss = tf.reduce_mean(tf.reduce_sum(filtered_mc_loss, axis=1)/query_denom)

  filtered_dqn_loss = tf.where(mask,
                               unfiltered_dqn_loss,
                               tf.zeros_like(unfiltered_dqn_loss))
  dqn_loss = tf.reduce_mean(tf.reduce_sum(filtered_dqn_loss, axis=1)/query_denom)

  tf.summary.scalar('monte_carlo/loss', mc_loss)

  tf.summary.scalar('DQN/loss', dqn_loss)

  tf.summary.scalar('DQN/max_doc_scores', tf.reduce_mean(label_doc_scores))
  tf.summary.scalar('DQN/max_pos_scores', tf.reduce_mean(q_pos_values))

  return mc_loss, dqn_loss
示例#8
0
def get_label_scores(params, replay, max_doc_ind, max_pos):
  serp_len = params['serp_len']
  all_docs = replay['docs']
  batch_docs = replay['serp']
  batch_pos = replay['pos_order']
  max_n_docs = params['max_docs']
  batch_size = all_docs.shape[0]
  hidden_state_size = params['hidden_state_size']

  init_hidden = tf.zeros([batch_size, hidden_state_size])

  doc_emb = mu._shared_doc_embeddings(batch_docs, params,
                                      '/label/doc_emb',
                                      label_network=True,
                                      inference=True,
                                      reuse_variable_scope=False)

  batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len-1])
  doc_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                         tf.reshape(max_doc_ind, [-1]),
                        ], axis=1)
  max_docs = tf.gather_nd(all_docs, doc_ind_nd)
  max_docs = tf.reshape(max_docs, [batch_size, serp_len-1, all_docs.shape[2]])

  max_emb = mu._shared_doc_embeddings(max_docs, params,
                                      '/label/doc_emb',
                                      label_network=True,
                                      inference=True,
                                      reuse_variable_scope=True)

  serp_emb = tf.transpose(doc_emb, [1, 0, 2])
  gru = ru.get_gru_layer(params, '/label/gru',
                               label_network=True,
                               inference=True,
                               reuse_variable_scope=False)

  pos = tf.cast(batch_pos, tf.float32)[:, :-1, None]
  pos = tf.transpose(pos, [1, 0, 2])
  gru_input = tf.concat([serp_emb[:-1, :, :], pos], axis=2)
  hidden_states = tf.scan(gru, gru_input, init_hidden)
  hidden_states = tf.transpose(hidden_states, [1, 0, 2])

  score_input = tf.concat([hidden_states, max_emb], axis=2)
  doc_scores = mu._create_subnetwork(score_input,
                                     params,
                                     subnetwork_name='/label/scoring/doc',
                                     label_network=True,
                                     inference=True,
                                     reuse_variable_scope=False)[:,:,0]

  pos_states = tf.concat([init_hidden[:, None, :], hidden_states], axis=1)
  pos_input = tf.concat([pos_states, doc_emb], axis=2)
  pos_scores = mu._create_subnetwork(pos_input,
                                     params,
                                     subnetwork_name='/label/scoring/pos',
                                     label_network=True,
                                     inference=True,
                                     reuse_variable_scope=False,
                                     n_output=10)

  batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len])
  serp_ind_nd = tf.tile(tf.range(serp_len, dtype=tf.int64)[None, :], [batch_size, 1])
  pos_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]),
                         tf.reshape(serp_ind_nd, [-1]),
                         tf.reshape(max_pos, [-1]),
                        ],axis=1)
  pos_scores = tf.gather_nd(pos_scores, pos_ind_nd)
  pos_scores = tf.reshape(pos_scores, [batch_size, serp_len])

  return doc_scores, pos_scores
示例#9
0
def loss(params, replay, rewards, doc_rewards):
    serp_len = params['serp_len']
    visible_dropout = params['visible_dropout']
    docs_per_query = replay['docs_per_query']
    batch_docs = replay['serp']
    max_n_docs = params['max_docs']
    batch_size = params['replay_batch']
    hidden_state_size = params['hidden_state_size']
    doc_level_rewards = params['doc_rewards']

    drop_col = tf.nn.dropout(replay['docs'], visible_dropout)

    doc_col = mu._shared_doc_embeddings(drop_col,
                                        params,
                                        '/main/doc_emb',
                                        inference=False,
                                        reuse_variable_scope=True)

    init_hidden = tf.zeros([batch_size, hidden_state_size])
    if params['context_input']:
        context_gru_fn = ru.get_gru_layer(params,
                                          '/main/gru/context',
                                          label_network=False,
                                          inference=False,
                                          reuse_variable_scope=True)
        scan_input = tf.transpose(doc_col, [1, 0, 2])
        context = tf.scan(context_gru_fn, scan_input, init_hidden)

        ind_nd = tf.concat([docs_per_query - 1,
                            tf.range(batch_size)[:, None]],
                           axis=1)
        init_hidden = tf.gather_nd(context, ind_nd)

    drop_docs = tf.nn.dropout(batch_docs, visible_dropout)

    doc_emb = mu._shared_doc_embeddings(drop_docs,
                                        params,
                                        '/main/doc_emb',
                                        label_network=False,
                                        inference=False,
                                        reuse_variable_scope=True)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    gru = ru.get_gru_layer(params,
                           '/main/gru',
                           label_network=False,
                           inference=False,
                           reuse_variable_scope=True)

    hidden_states = tf.scan(gru, serp_emb[:-1, :, :], init_hidden)
    hidden_states = tf.concat([init_hidden[None, :, :], hidden_states], axis=0)
    score_input = tf.concat([hidden_states, serp_emb], axis=2)

    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/main/scoring',
                                   label_network=False,
                                   reuse_variable_scope=True,
                                   inference=False)

    scores = tf.transpose(scores, [1, 0, 2])[:, :, 0]

    if not doc_level_rewards:
        unfiltered_mc_loss = (rewards - scores)**2.
    else:
        cum_rewards = tf.cumsum(doc_rewards, axis=1, reverse=True)
        unfiltered_mc_loss = (rewards - scores)**2.

    max_train_ind = max_train_docs(params, replay, hidden_states, doc_col)
    label_scores = get_label_scores(params, replay, max_train_ind)
    if not doc_level_rewards:
        q_values = tf.concat([label_scores, rewards], axis=1)

        end_mask = tf.equal(docs_per_query - 1, tf.range(serp_len)[None, :])
        reward_tile = tf.tile(rewards, [1, serp_len])
        q_values = tf.where(end_mask, reward_tile, q_values)

        unfiltered_dqn_loss = (scores - q_values)**2.
    else:
        zero_end = tf.zeros([batch_size, 1])
        q_values = tf.concat([label_scores, zero_end], axis=1)
        end_mask = tf.equal(docs_per_query - 1, tf.range(serp_len)[None, :])
        q_values = tf.where(end_mask, tf.zeros_like(q_values), q_values)
        q_values += doc_rewards

        unfiltered_dqn_loss = (scores - q_values)**2.

    mask = tf.squeeze(tf.sequence_mask(docs_per_query, serp_len), axis=1)
    query_denom = tf.cast(docs_per_query[:, 0], tf.float32)
    query_denom = tf.minimum(query_denom, serp_len)
    query_denom = tf.maximum(query_denom, tf.ones_like(query_denom))

    filtered_mc_loss = tf.where(mask, unfiltered_mc_loss,
                                tf.zeros_like(unfiltered_mc_loss))
    mc_loss = tf.reduce_mean(
        tf.reduce_sum(filtered_mc_loss, axis=1) / query_denom)

    filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss,
                                 tf.zeros_like(unfiltered_dqn_loss))
    dqn_loss = tf.reduce_mean(
        tf.reduce_sum(filtered_dqn_loss, axis=1) / query_denom)

    # dqn_loss = tf.Print(dqn_loss, [filtered_dqn_loss[0, j] for j in range(10)], 'dqn loss: ')
    # dqn_loss = tf.Print(dqn_loss, [query_denom[0]], 'denom: ')
    # dqn_loss = tf.Print(dqn_loss, [dqn_loss], 'total loss: ')

    tf.summary.scalar('monte_carlo/loss', mc_loss)
    tf.summary.scalar('DQN/loss', dqn_loss)

    tf.summary.scalar('DQN/double_max_scores', tf.reduce_mean(label_scores))

    return mc_loss, dqn_loss
示例#10
0
def get_label_scores(params, replay, max_train_ind):
    serp_len = params['serp_len']
    batch_size = replay['serp'].shape[0]
    hidden_state_size = params['hidden_state_size']
    docs_per_query = replay['docs_per_query']
    doc_col = replay['docs']
    batch_ind = tf.tile(
        tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len - 1])
    max_ind = tf.stack(
        [tf.reshape(batch_ind, [-1]),
         tf.reshape(max_train_ind, [-1])], axis=1)
    max_docs = tf.gather_nd(doc_col, max_ind)
    max_docs = tf.reshape(max_docs, [batch_size, serp_len - 1, -1])

    max_emb = mu._shared_doc_embeddings(max_docs,
                                        params,
                                        '/label/doc_emb',
                                        inference=True,
                                        label_network=True,
                                        reuse_variable_scope=False)

    doc_emb = mu._shared_doc_embeddings(replay['serp'][:, :-1],
                                        params,
                                        '/label/doc_emb',
                                        inference=True,
                                        label_network=True,
                                        reuse_variable_scope=True)

    gru = ru.get_gru_layer(params,
                           '/label/gru',
                           label_network=True,
                           inference=True,
                           reuse_variable_scope=False)

    init_hidden = tf.zeros([batch_size, hidden_state_size])
    if params['context_input']:
        emb_col = mu._shared_doc_embeddings(doc_col,
                                            params,
                                            '/label/doc_emb',
                                            inference=True,
                                            label_network=True,
                                            reuse_variable_scope=True)
        context_gru_fn = ru.get_gru_layer(params,
                                          '/label/gru/context',
                                          label_network=True,
                                          inference=True,
                                          reuse_variable_scope=False)
        scan_input = tf.transpose(emb_col, [1, 0, 2])
        context = tf.scan(context_gru_fn, scan_input, init_hidden)

        ind_nd = tf.concat([docs_per_query - 1,
                            tf.range(batch_size)[:, None]],
                           axis=1)
        init_hidden = tf.gather_nd(context, ind_nd)

    serp_emb = tf.transpose(doc_emb, [1, 0, 2])
    hidden_states = tf.scan(gru, serp_emb, init_hidden)
    hidden_states = tf.transpose(hidden_states, [1, 0, 2])

    score_input = tf.concat([hidden_states, max_emb], axis=2)
    scores = mu._create_subnetwork(score_input,
                                   params,
                                   subnetwork_name='/label/scoring',
                                   label_network=True,
                                   reuse_variable_scope=False,
                                   inference=True)
    return tf.stop_gradient(scores)[:, :, 0]