def get_label_scores(params, replay): serp_len = params['serp_len'] all_docs = replay['docs'] batch_docs = replay['serp'] batch_pos = replay['pos_order'] max_n_docs = params['max_docs'] n_docs = batch_docs.shape[0] hidden_state_size = params['hidden_state_size'] init_hidden = tf.zeros([n_docs, hidden_state_size]) doc_col = mu._shared_doc_embeddings(all_docs, params, '/label/doc_emb', label_network=True, inference=True, reuse_variable_scope=False) doc_emb = mu._shared_doc_embeddings(batch_docs[:, :-1, :], params, '/label/doc_emb', label_network=True, inference=True, reuse_variable_scope=True) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) gru = ru.get_gru_layer(params, '/label/gru', label_network=True, inference=True, reuse_variable_scope=False) pos = tf.cast(batch_pos, tf.float32)[:, :-1, None] pos = tf.transpose(pos, [1, 0, 2]) gru_input = tf.concat([serp_emb, pos], axis=2) hidden_states = tf.scan(gru, gru_input, init_hidden) hidden_states = tf.transpose(hidden_states, [1, 0, 2]) tiled_states = tf.tile(hidden_states[:, :, None, :], [1, 1, max_n_docs, 1]) tiled_docs = tf.tile(doc_col[:, None, :, :], [1, serp_len - 1, 1, 1]) score_input = tf.concat([tiled_states, tiled_docs], axis=3) return mu._create_subnetwork(score_input, params, subnetwork_name='/label/scoring', label_network=True, inference=True, reuse_variable_scope=False)
def model(params, examples, labels, epsilon, stats_ops): serp_len = params['serp_len'] doc_emb_size = params['doc_emb'][-1] hidden_state_size = params['hidden_state_size'] docs = mu._get_doc_tensors(examples, params, 'main') result = {} n_docs = tf.shape(docs)[0] result['docs_per_query'] = n_docs doc_emb = mu._shared_doc_embeddings(docs, params, '/main/doc_emb', inference=True) hidden_init = tf.zeros([1, hidden_state_size]) gru_fn = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=True, reuse_variable_scope=False) policy = PositionEpsilonGreedy(serp_len, epsilon, n_docs) hidden_state = hidden_init #tf.zeros([n_docs, hidden_state_size]) serp = [] serp_pos = [] serp_labels = [] serp_ind = [] for i in range(serp_len): hidden_states = tf.tile(hidden_state, [n_docs, 1]) score_input = tf.concat([hidden_states, doc_emb], axis=1) scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring', label_network=False, reuse_variable_scope=i > 0, inference=True) for j in range(serp_len): mean_summary(params, 'policy_%d/pos_%d' % (i, j), scores[:, j], stats_ops) action_ind, action_pos = policy.choose(scores) select_doc = tf.gather(docs, action_ind) serp.append(select_doc) serp_ind.append(action_ind) in_doc = tf.less(i, n_docs) serp_labels.append( tf.cond( in_doc, lambda: tf.gather(labels, action_ind, axis=0), lambda: tf.constant([[0]], dtype=tf.int64), )) serp_labels[-1].set_shape([1, 1]) serp_pos.append( tf.cond( in_doc, lambda: tf.expand_dims(action_pos, axis=1), lambda: tf.constant([[serp_len]], dtype=tf.int32), )) serp_pos[-1].set_shape([1, 1]) if i < serp_len - 1: a_pos = tf.expand_dims(tf.cast(action_pos, tf.float32), axis=1) a_doc = tf.gather(doc_emb, action_ind) gru_input = tf.concat([a_doc, a_pos], axis=1) hidden_state = gru_fn(hidden_state, gru_input) pos_order = tf.concat(serp_pos, axis=1) order_ind = tf.nn.top_k(-pos_order, serp_len)[1] # order_ind.set_shape() unordered_labels = tf.squeeze(tf.concat(serp_labels, axis=1), axis=0) ordered_labels = tf.gather(unordered_labels, order_ind) result['serp'] = tf.stack(serp, axis=1) result['serp_ind'] = tf.stack(serp_ind, axis=1) result['serp_doc'] = tf.stack(serp_ind, axis=1) result['labels'] = ordered_labels result['select_order_labels'] = unordered_labels[None, :] # pos_order = tf.Print(pos_order, [unordered_labels[i] for i in range(10)], 'unordered: ') # pos_order = tf.Print(pos_order, [pos_order[0, i] for i in range(10)], 'reranking: ') # pos_order = tf.Print(pos_order, [result['labels'][0, i] for i in range(10)], 'ordered: ') # pos_order = tf.Print(pos_order, [n_docs], ' ') result['pos_order'] = pos_order # tf.summary.histogram("label/output", result['labels']) # if params['context_input']: max_docs = params['max_docs'] padding = tf.convert_to_tensor([[0, max_docs - n_docs], [0, 0]]) padded_docs = tf.pad(docs, padding, "CONSTANT") padded_docs = tf.reshape(padded_docs, [1, max_docs, docs.shape[1].value]) result['docs'] = padded_docs return result
def loss(params, replay, rewards): serp_len = params['serp_len'] visible_dropout = params['visible_dropout'] docs_in_query = replay['docs_per_query'] batch_docs = replay['serp'] batch_pos = replay['pos_order'] max_n_docs = params['max_docs'] n_docs = batch_docs.shape[0] hidden_state_size = params['hidden_state_size'] drop_col = tf.nn.dropout(replay['docs'], visible_dropout) doc_col = mu._shared_doc_embeddings(drop_col, params, '/main/doc_emb', inference=False, reuse_variable_scope=True) init_hidden = tf.zeros([n_docs, hidden_state_size]) drop_docs = tf.nn.dropout(batch_docs, visible_dropout) doc_emb = mu._shared_doc_embeddings(drop_docs, params, '/main/doc_emb', inference=False, reuse_variable_scope=True) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) gru = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=False, reuse_variable_scope=True) pos = tf.expand_dims(tf.cast(batch_pos, tf.float32), axis=2) pos = tf.transpose(pos, [1, 0, 2]) gru_input = tf.concat([serp_emb, pos], axis=2) hidden_states = tf.scan(gru, gru_input, init_hidden) score_states = tf.concat( [init_hidden[None, :, :], hidden_states[:-1, :, :]], axis=0) score_input = tf.concat([score_states, serp_emb], axis=2) pos_scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring', label_network=False, reuse_variable_scope=True, inference=False) pos_scores = tf.transpose(pos_scores, [1, 0, 2]) pos_filter = tf.one_hot(batch_pos, serp_len) scores = tf.reduce_sum(pos_scores * pos_filter, axis=2) unfiltered_mc_loss = (rewards - scores)**2 max_filter = max_train_filter(params, hidden_states, serp_len, doc_col, replay['serp_ind'], batch_pos, docs_in_query, max_n_docs) label_scores = get_label_scores(params, replay) double_max_scores = tf.reduce_sum(max_filter * label_scores, axis=[2, 3]) q_values = tf.concat([double_max_scores, rewards], axis=1) end_mask = tf.equal(docs_in_query - 1, tf.range(serp_len)[None, :]) reward_tile = tf.tile(rewards, [1, serp_len]) q_values = tf.where(end_mask, reward_tile, q_values) unfiltered_dqn_loss = (scores - q_values)**2 doc_denom = tf.cast(tf.reduce_sum(docs_in_query), tf.float32) mask = tf.squeeze(tf.sequence_mask(docs_in_query, serp_len), axis=1) filtered_mc_loss = tf.where(mask, unfiltered_mc_loss, tf.zeros_like(unfiltered_mc_loss)) mc_loss = tf.reduce_sum(filtered_mc_loss) / doc_denom filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss, tf.zeros_like(unfiltered_dqn_loss)) dqn_loss = tf.reduce_sum(filtered_dqn_loss) / doc_denom # dqn_loss = tf.Print(dqn_loss, [scores[0,i] for i in range(10)], 'Scores:') # dqn_loss = tf.Print(dqn_loss, [q_values[0,i] for i in range(10)], 'Q_values:') # dqn_loss = tf.Print(dqn_loss, [unfiltered_dqn_loss[0,i] for i in range(10)], 'Loss:') # dqn_loss = tf.Print(dqn_loss, [n_docs], 'DQN:') # mc_loss = tf.Print(mc_loss, [n_docs], 'MC:') tf.summary.scalar('monte_carlo/loss', mc_loss) tf.summary.scalar('DQN/loss', dqn_loss) tf.summary.scalar('DQN/double_max_scores', tf.reduce_mean(double_max_scores)) return mc_loss, dqn_loss
def gru_model(params, examples, labels, epsilon): serp_len = params['serp_len'] doc_emb_size = params['doc_emb'][-1] hidden_state_size = params['hidden_state_size'] # docs = mu._get_doc_tensors(examples, params, 'main') docs = examples['doc_tensors'] batch_size = docs.shape[0].value batch_max_docs = tf.shape(docs)[1] docs_per_query = examples['n_docs'] # if params['context_input']: # to_shuffle = tf.concat([tf.cast(labels, tf.float32), docs], axis=1) # shuffled = tf.random_shuffle(to_shuffle) # labels = tf.cast(tf.slice(shuffled, [0, 0], [-1, 1]), tf.int64) # docs = tf.slice(shuffled, [0, 1], [-1, -1]) assert not params['context_input'], 'Context not supported for GRU.' result = { 'docs_per_query': docs_per_query, } doc_emb = mu._shared_doc_embeddings(docs, params, '/main/doc_emb', inference=True) hidden_init = tf.zeros([batch_size, hidden_state_size]) # if params['context_input']: # context_gru_fn = ru.get_gru_layer(params, '/main/gru/context', # label_network=False, # inference=True, # reuse_variable_scope=False) # context_init = hidden_init # context = tf.scan(context_gru_fn, # tf.expand_dims(doc_emb, axis=1), context_init) # hidden_init = tf.gather(context, n_docs-1) gru_fn = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=True, reuse_variable_scope=False) policy = mu.EpsilonGreedy(epsilon, batch_size, batch_max_docs, docs_per_query) hidden_state = hidden_init #tf.zeros([n_docs, hidden_state_size]) serp = [] serp_labels = [] serp_ind = [] for i in range(serp_len): hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1]) score_input = tf.concat([hidden_states, doc_emb], axis=2) scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring', label_network=False, reuse_variable_scope=i>0, inference=True) tf.summary.scalar('policy/scores/pos_%d' % i, tf.reduce_mean(scores)) action = policy.choose(scores) serp_ind.append(action) nd_ind = tf.stack([tf.range(batch_size, dtype=tf.int64), action], axis=1) select_doc = tf.gather_nd(docs, nd_ind) select_labels = tf.gather_nd(labels, nd_ind)[:, None] serp_labels.append(tf.where( tf.less(i, docs_per_query), select_labels, tf.zeros([batch_size, 1], dtype=tf.int32), )) serp.append(select_doc) if i < serp_len-1: select_emb = tf.gather_nd(doc_emb, nd_ind) hidden_state = gru_fn(hidden_state, select_emb) result['serp'] = tf.stack(serp, axis=1) result['serp_ind'] = tf.stack(serp_ind, axis=1) result['serp_ind'] = tf.Print(result['serp_ind'], serp_ind, 'serp_ind: ') result['labels'] = tf.concat(serp_labels, axis=1) tf.summary.histogram("label/output", result['labels']) # if params['context_input']: max_docs = params['max_docs'] padding = tf.convert_to_tensor([[0, 0], [0, max_docs-batch_max_docs], [0, 0]]) padded_docs = tf.pad(docs, padding, "CONSTANT") padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value]) result['docs'] = padded_docs return result
def calculate_gru_loss(params, replay, rewards): serp_len = params['serp_len'] visible_dropout = params['visible_dropout'] docs_in_query = replay['docs_per_query'] batch_docs = replay['serp'] max_n_docs = params['max_docs'] n_docs = tf.shape(batch_docs)[0] hidden_state_size = params['hidden_state_size'] drop_col = tf.nn.dropout(replay['docs'], visible_dropout) doc_col = mu._shared_doc_embeddings(drop_col, params, '/main/doc_emb', inference=False, reuse_variable_scope=True) # if params['context_input']: # gru = ru.get_gru_layer(params, '/main/gru/collection/', # label_network=False, # inference=False, # reuse_variable_scope=False) # input_col = tf.transpose(doc_col, [1, 0, 2]) # hidden_col = tf.scan(gru, input_col, init_hidden) # hidden_col = tf.transpose(hidden_col, [1, 0, 2]) # indices = tf.stack([tf.range(n_docs), tf.squeeze(docs_in_query-1 ,axis=1)], axis=1) # init_hidden = tf.gather_nd(hidden_col, indices) # else: init_hidden = tf.zeros([n_docs, hidden_state_size]) drop_docs = tf.nn.dropout(batch_docs, visible_dropout) doc_emb = mu._shared_doc_embeddings(drop_docs, params, '/main/doc_emb', label_network=False, inference=False, reuse_variable_scope=True) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) gru = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=False, reuse_variable_scope=True) init_scores = tf.zeros([n_docs, 1]) hidden_states = tf.scan(gru, serp_emb[:-1, :, :], init_hidden) hidden_states = tf.concat([init_hidden[None, :, :], hidden_states], axis=0) scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring', label_network=False, reuse_variable_scope=i>0, inference=True) scores = tf.squeeze(tf.transpose(scores, [1,0,2]), axis=2) unfiltered_mc_loss = (rewards-scores)**2. max_filter = max_train_filter(gru, hidden_states, serp_len, doc_col, replay['serp_ind'], docs_in_query, max_n_docs) label_scores = get_label_scores(params, replay) double_max_scores = tf.reduce_sum(max_filter*label_scores, axis=2) q_values = tf.concat([double_max_scores, rewards], axis=1) end_mask = tf.equal(docs_in_query-1, tf.expand_dims(tf.range(serp_len), axis=0)) reward_tile = tf.tile(rewards, [1, serp_len]) q_values = tf.where(end_mask, reward_tile, q_values) unfiltered_dqn_loss = (scores - q_values)**2. doc_denom = tf.cast(tf.reduce_sum(tf.minimum(docs_in_query, serp_len)), tf.float32) mask = tf.squeeze(tf.sequence_mask(docs_in_query, serp_len), axis=1) filtered_mc_loss = tf.where(mask, unfiltered_mc_loss, tf.zeros_like(unfiltered_mc_loss)) mc_loss = tf.reduce_sum(filtered_mc_loss)/doc_denom filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss, tf.zeros_like(unfiltered_dqn_loss)) dqn_loss = tf.reduce_sum(filtered_dqn_loss)/doc_denom # tf.summary.scalar('q_loss/loss', mean_losses[1]) tf.summary.scalar('monte_carlo/loss', mc_loss) tf.summary.scalar('DQN/loss', dqn_loss) filtered_double_max = tf.where(mask[:,:-1], double_max_scores, tf.zeros_like(double_max_scores)) double_max_denom = doc_denom - tf.cast(n_docs, tf.float32) double_max_mean = tf.reduce_sum(filtered_double_max)/double_max_denom tf.summary.scalar('DQN/double_max_scores', double_max_mean) return mc_loss, dqn_loss
def model(params, examples, labels, epsilon, stats_ops): serp_len = params['serp_len'] doc_emb_size = params['doc_emb'][-1] hidden_state_size = params['hidden_state_size'] docs = examples['doc_tensors'] batch_size = docs.shape[0].value batch_max_docs = tf.shape(docs)[1] docs_per_query = examples['n_docs'] result = { 'docs_per_query': docs_per_query, } doc_emb = mu._shared_doc_embeddings(docs, params, '/main/doc_emb', inference=True) hidden_init = tf.zeros([batch_size, hidden_state_size]) gru_fn = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=True, reuse_variable_scope=False) policy = PositionEpsilonGreedy(serp_len, epsilon, batch_size, batch_max_docs, docs_per_query) hidden_state = hidden_init serp = [] serp_pos = [] serp_labels = [] serp_ind = [] for i in range(serp_len): hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1]) score_input = tf.concat([hidden_states, doc_emb], axis=2) doc_scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring/doc', label_network=False, reuse_variable_scope=i>0, inference=True, n_output=1) action_ind = policy.choose_doc(doc_scores) ind_nd = tf.stack([tf.range(batch_size, dtype=tf.int64), action_ind], axis=1) select_doc = tf.gather_nd(docs, ind_nd) serp.append(select_doc) serp_ind.append(action_ind) select_emb = tf.gather_nd(doc_emb, ind_nd) pos_input = tf.concat([hidden_state, select_emb], axis=1) pos_scores = mu._create_subnetwork(pos_input, params, subnetwork_name='/main/scoring/pos', label_network=False, reuse_variable_scope=i>0, inference=True, n_output=10) # pos_scores = tf.Print(pos_scores, [pos_scores[0,x] for x in range(10)], 'scores %d: ' % i) mean_summary(params, 'policy_%d/doc' % i, tf.gather_nd(doc_scores, ind_nd), stats_ops) for j in range(serp_len): mean_summary(params, 'policy_%d/pos_%d' % (i, j), pos_scores[:, j], stats_ops) action_pos = policy.choose_pos(pos_scores) # if i == 0: # action_pos = tf.Print(action_pos, [pos_scores[0,x] for x in range(10)], 'pos_scores: ') # action_pos = tf.Print(action_pos, [action_pos], 'pos: ') in_doc = tf.less(i, docs_per_query[:, 0]) serp_labels.append(tf.where( in_doc, tf.gather_nd(labels, ind_nd), tf.zeros([batch_size], dtype=tf.int32), )) serp_pos.append(tf.where( in_doc, action_pos, tf.fill([batch_size], tf.cast(serp_len, dtype=tf.int64)), )) if i < serp_len-1: a_pos = tf.cast(action_pos, tf.float32)[:, None] gru_input = tf.concat([select_emb, a_pos], axis=1) hidden_state = gru_fn(hidden_state, gru_input) pos_order = tf.stack(serp_pos, axis=1) _, order_ind = tf.nn.top_k(-pos_order, serp_len) unordered_labels = tf.stack(serp_labels, axis=1) batch_ind_nd = tf.tile(tf.range(batch_size)[:, None], [1, serp_len]) order_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]), tf.reshape(order_ind, [-1])], axis=1) ordered_labels = tf.gather_nd(unordered_labels, order_ind_nd) ordered_labels = tf.reshape(ordered_labels, [batch_size, serp_len]) result['serp'] = tf.stack(serp, axis=1) result['serp_ind'] = tf.stack(serp_ind, axis=1) result['labels'] = ordered_labels result['select_order_labels'] = unordered_labels result['pos_order'] = pos_order max_docs = params['max_docs'] padding = tf.convert_to_tensor([[0, 0], [0, max_docs-batch_max_docs], [0, 0]]) padded_docs = tf.pad(docs, padding, "CONSTANT") padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value]) result['docs'] = padded_docs return result
def loss(params, replay, rewards, doc_rewards): serp_len = params['serp_len'] visible_dropout = params['visible_dropout'] docs_per_query = replay['docs_per_query'] batch_docs = replay['serp'] batch_pos = replay['pos_order'] max_n_docs = params['max_docs'] batch_size = batch_docs.shape[0] hidden_state_size = params['hidden_state_size'] doc_level_rewards = params['doc_rewards'] mask = tf.squeeze(tf.sequence_mask(docs_per_query, serp_len), axis=1) init_hidden = tf.zeros([batch_size, hidden_state_size]) drop_col = tf.nn.dropout(replay['docs'], visible_dropout) doc_col = mu._shared_doc_embeddings(drop_col, params, '/main/doc_emb', inference=False, reuse_variable_scope=True) drop_docs = tf.nn.dropout(batch_docs, visible_dropout) doc_emb = mu._shared_doc_embeddings(drop_docs, params, '/main/doc_emb', inference=False, reuse_variable_scope=True) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) gru = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=False, reuse_variable_scope=True) pos = tf.cast(batch_pos, tf.float32)[:, :, None] pos = tf.transpose(pos, [1, 0, 2]) gru_input = tf.concat([serp_emb, pos], axis=2) hidden_states = tf.scan(gru, gru_input, init_hidden) hidden_states = tf.concat([init_hidden[None, :, :], hidden_states[:-1, :, :]], axis=0) hidden_states = tf.transpose(hidden_states, [1, 0, 2]) score_input = tf.concat([hidden_states, doc_emb], axis=2) doc_scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring/doc', label_network=False, reuse_variable_scope=True, inference=False)[:, :, 0] pos_scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring/pos', label_network=False, reuse_variable_scope=True, inference=False, n_output=serp_len) batch_pos_filtered = tf.where(mask, batch_pos, tf.zeros_like(batch_pos)) batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len]) serp_ind_nd = tf.tile(tf.range(serp_len, dtype=tf.int64)[:, None], [batch_size, 1]) pos_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]), tf.reshape(serp_ind_nd, [-1]), tf.reshape(batch_pos_filtered, [-1]), ], axis=1) pos_scores = tf.gather_nd(pos_scores, pos_ind_nd) pos_scores = tf.reshape(pos_scores, [batch_size, serp_len]) if not doc_level_rewards: unfiltered_mc_loss = (rewards-pos_scores)**2 + (rewards-doc_scores)**2 else: cum_rewards = tf.cumsum(doc_rewards, axis=1, reverse=True) unfiltered_mc_loss = (cum_rewards-pos_scores)**2 + (cum_rewards-doc_scores)**2 max_doc_ind, max_pos = max_train_doc_pos(params, hidden_states, doc_emb, serp_len, doc_col, replay['serp_ind'], batch_pos, docs_per_query, max_n_docs) label_doc_scores, q_pos_values = get_label_scores(params, replay, max_doc_ind, max_pos) if not doc_level_rewards: q_doc_values = tf.concat([label_doc_scores, rewards], axis=1) end_mask = tf.equal(docs_per_query-1, tf.range(serp_len)[None, :]) reward_tile = tf.tile(rewards, [1, serp_len]) q_doc_values = tf.where(end_mask, reward_tile, q_doc_values) else: zero_end = tf.zeros([batch_size, 1]) q_doc_values = tf.concat([label_doc_scores, zero_end], axis=1) end_mask = tf.equal(docs_per_query-1, tf.range(serp_len)[None, :]) q_doc_values = tf.where(end_mask, tf.zeros_like(q_doc_values), q_doc_values) q_doc_values += doc_rewards # q_doc_values = tf.Print(q_doc_values, [batch_pos[0,x] for x in range(10)], 'pos: ') # q_doc_values = tf.Print(q_doc_values, [pos_scores[0,x] for x in range(10)], 'pos_scores: ') # q_doc_values = tf.Print(q_doc_values, [q_doc_values[0,x] for x in range(10)], 'q-values: ') # q_doc_values = tf.Print(q_doc_values, [doc_rewards[0,x] for x in range(10)], 'doc_rewards: ') unfiltered_doc_loss = (doc_scores - q_pos_values)**2 unfiltered_pos_loss = (pos_scores - q_doc_values)**2 unfiltered_dqn_loss = unfiltered_doc_loss + unfiltered_pos_loss query_denom = tf.cast(docs_per_query[:, 0], tf.float32) query_denom = tf.minimum(query_denom, serp_len) query_denom = tf.maximum(query_denom, tf.ones_like(query_denom)) filtered_mc_loss = tf.where(mask, unfiltered_mc_loss, tf.zeros_like(unfiltered_mc_loss)) mc_loss = tf.reduce_mean(tf.reduce_sum(filtered_mc_loss, axis=1)/query_denom) filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss, tf.zeros_like(unfiltered_dqn_loss)) dqn_loss = tf.reduce_mean(tf.reduce_sum(filtered_dqn_loss, axis=1)/query_denom) tf.summary.scalar('monte_carlo/loss', mc_loss) tf.summary.scalar('DQN/loss', dqn_loss) tf.summary.scalar('DQN/max_doc_scores', tf.reduce_mean(label_doc_scores)) tf.summary.scalar('DQN/max_pos_scores', tf.reduce_mean(q_pos_values)) return mc_loss, dqn_loss
def get_label_scores(params, replay, max_doc_ind, max_pos): serp_len = params['serp_len'] all_docs = replay['docs'] batch_docs = replay['serp'] batch_pos = replay['pos_order'] max_n_docs = params['max_docs'] batch_size = all_docs.shape[0] hidden_state_size = params['hidden_state_size'] init_hidden = tf.zeros([batch_size, hidden_state_size]) doc_emb = mu._shared_doc_embeddings(batch_docs, params, '/label/doc_emb', label_network=True, inference=True, reuse_variable_scope=False) batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len-1]) doc_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]), tf.reshape(max_doc_ind, [-1]), ], axis=1) max_docs = tf.gather_nd(all_docs, doc_ind_nd) max_docs = tf.reshape(max_docs, [batch_size, serp_len-1, all_docs.shape[2]]) max_emb = mu._shared_doc_embeddings(max_docs, params, '/label/doc_emb', label_network=True, inference=True, reuse_variable_scope=True) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) gru = ru.get_gru_layer(params, '/label/gru', label_network=True, inference=True, reuse_variable_scope=False) pos = tf.cast(batch_pos, tf.float32)[:, :-1, None] pos = tf.transpose(pos, [1, 0, 2]) gru_input = tf.concat([serp_emb[:-1, :, :], pos], axis=2) hidden_states = tf.scan(gru, gru_input, init_hidden) hidden_states = tf.transpose(hidden_states, [1, 0, 2]) score_input = tf.concat([hidden_states, max_emb], axis=2) doc_scores = mu._create_subnetwork(score_input, params, subnetwork_name='/label/scoring/doc', label_network=True, inference=True, reuse_variable_scope=False)[:,:,0] pos_states = tf.concat([init_hidden[:, None, :], hidden_states], axis=1) pos_input = tf.concat([pos_states, doc_emb], axis=2) pos_scores = mu._create_subnetwork(pos_input, params, subnetwork_name='/label/scoring/pos', label_network=True, inference=True, reuse_variable_scope=False, n_output=10) batch_ind_nd = tf.tile(tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len]) serp_ind_nd = tf.tile(tf.range(serp_len, dtype=tf.int64)[None, :], [batch_size, 1]) pos_ind_nd = tf.stack([tf.reshape(batch_ind_nd, [-1]), tf.reshape(serp_ind_nd, [-1]), tf.reshape(max_pos, [-1]), ],axis=1) pos_scores = tf.gather_nd(pos_scores, pos_ind_nd) pos_scores = tf.reshape(pos_scores, [batch_size, serp_len]) return doc_scores, pos_scores
def loss(params, replay, rewards, doc_rewards): serp_len = params['serp_len'] visible_dropout = params['visible_dropout'] docs_per_query = replay['docs_per_query'] batch_docs = replay['serp'] max_n_docs = params['max_docs'] batch_size = params['replay_batch'] hidden_state_size = params['hidden_state_size'] doc_level_rewards = params['doc_rewards'] drop_col = tf.nn.dropout(replay['docs'], visible_dropout) doc_col = mu._shared_doc_embeddings(drop_col, params, '/main/doc_emb', inference=False, reuse_variable_scope=True) init_hidden = tf.zeros([batch_size, hidden_state_size]) if params['context_input']: context_gru_fn = ru.get_gru_layer(params, '/main/gru/context', label_network=False, inference=False, reuse_variable_scope=True) scan_input = tf.transpose(doc_col, [1, 0, 2]) context = tf.scan(context_gru_fn, scan_input, init_hidden) ind_nd = tf.concat([docs_per_query - 1, tf.range(batch_size)[:, None]], axis=1) init_hidden = tf.gather_nd(context, ind_nd) drop_docs = tf.nn.dropout(batch_docs, visible_dropout) doc_emb = mu._shared_doc_embeddings(drop_docs, params, '/main/doc_emb', label_network=False, inference=False, reuse_variable_scope=True) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) gru = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=False, reuse_variable_scope=True) hidden_states = tf.scan(gru, serp_emb[:-1, :, :], init_hidden) hidden_states = tf.concat([init_hidden[None, :, :], hidden_states], axis=0) score_input = tf.concat([hidden_states, serp_emb], axis=2) scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring', label_network=False, reuse_variable_scope=True, inference=False) scores = tf.transpose(scores, [1, 0, 2])[:, :, 0] if not doc_level_rewards: unfiltered_mc_loss = (rewards - scores)**2. else: cum_rewards = tf.cumsum(doc_rewards, axis=1, reverse=True) unfiltered_mc_loss = (rewards - scores)**2. max_train_ind = max_train_docs(params, replay, hidden_states, doc_col) label_scores = get_label_scores(params, replay, max_train_ind) if not doc_level_rewards: q_values = tf.concat([label_scores, rewards], axis=1) end_mask = tf.equal(docs_per_query - 1, tf.range(serp_len)[None, :]) reward_tile = tf.tile(rewards, [1, serp_len]) q_values = tf.where(end_mask, reward_tile, q_values) unfiltered_dqn_loss = (scores - q_values)**2. else: zero_end = tf.zeros([batch_size, 1]) q_values = tf.concat([label_scores, zero_end], axis=1) end_mask = tf.equal(docs_per_query - 1, tf.range(serp_len)[None, :]) q_values = tf.where(end_mask, tf.zeros_like(q_values), q_values) q_values += doc_rewards unfiltered_dqn_loss = (scores - q_values)**2. mask = tf.squeeze(tf.sequence_mask(docs_per_query, serp_len), axis=1) query_denom = tf.cast(docs_per_query[:, 0], tf.float32) query_denom = tf.minimum(query_denom, serp_len) query_denom = tf.maximum(query_denom, tf.ones_like(query_denom)) filtered_mc_loss = tf.where(mask, unfiltered_mc_loss, tf.zeros_like(unfiltered_mc_loss)) mc_loss = tf.reduce_mean( tf.reduce_sum(filtered_mc_loss, axis=1) / query_denom) filtered_dqn_loss = tf.where(mask, unfiltered_dqn_loss, tf.zeros_like(unfiltered_dqn_loss)) dqn_loss = tf.reduce_mean( tf.reduce_sum(filtered_dqn_loss, axis=1) / query_denom) # dqn_loss = tf.Print(dqn_loss, [filtered_dqn_loss[0, j] for j in range(10)], 'dqn loss: ') # dqn_loss = tf.Print(dqn_loss, [query_denom[0]], 'denom: ') # dqn_loss = tf.Print(dqn_loss, [dqn_loss], 'total loss: ') tf.summary.scalar('monte_carlo/loss', mc_loss) tf.summary.scalar('DQN/loss', dqn_loss) tf.summary.scalar('DQN/double_max_scores', tf.reduce_mean(label_scores)) return mc_loss, dqn_loss
def get_label_scores(params, replay, max_train_ind): serp_len = params['serp_len'] batch_size = replay['serp'].shape[0] hidden_state_size = params['hidden_state_size'] docs_per_query = replay['docs_per_query'] doc_col = replay['docs'] batch_ind = tf.tile( tf.range(batch_size, dtype=tf.int64)[:, None], [1, serp_len - 1]) max_ind = tf.stack( [tf.reshape(batch_ind, [-1]), tf.reshape(max_train_ind, [-1])], axis=1) max_docs = tf.gather_nd(doc_col, max_ind) max_docs = tf.reshape(max_docs, [batch_size, serp_len - 1, -1]) max_emb = mu._shared_doc_embeddings(max_docs, params, '/label/doc_emb', inference=True, label_network=True, reuse_variable_scope=False) doc_emb = mu._shared_doc_embeddings(replay['serp'][:, :-1], params, '/label/doc_emb', inference=True, label_network=True, reuse_variable_scope=True) gru = ru.get_gru_layer(params, '/label/gru', label_network=True, inference=True, reuse_variable_scope=False) init_hidden = tf.zeros([batch_size, hidden_state_size]) if params['context_input']: emb_col = mu._shared_doc_embeddings(doc_col, params, '/label/doc_emb', inference=True, label_network=True, reuse_variable_scope=True) context_gru_fn = ru.get_gru_layer(params, '/label/gru/context', label_network=True, inference=True, reuse_variable_scope=False) scan_input = tf.transpose(emb_col, [1, 0, 2]) context = tf.scan(context_gru_fn, scan_input, init_hidden) ind_nd = tf.concat([docs_per_query - 1, tf.range(batch_size)[:, None]], axis=1) init_hidden = tf.gather_nd(context, ind_nd) serp_emb = tf.transpose(doc_emb, [1, 0, 2]) hidden_states = tf.scan(gru, serp_emb, init_hidden) hidden_states = tf.transpose(hidden_states, [1, 0, 2]) score_input = tf.concat([hidden_states, max_emb], axis=2) scores = mu._create_subnetwork(score_input, params, subnetwork_name='/label/scoring', label_network=True, reuse_variable_scope=False, inference=True) return tf.stop_gradient(scores)[:, :, 0]