def _ensemble_predict(self, obs): actor_outputs = [] for i in range(self.ensemble_num): actor_outputs.append(self.actors[i].predict(obs)) batch_actions = layers.concat(actor_outputs, axis=0) batch_obs = layers.expand(obs, expand_times=[self.ensemble_num, 1]) critic_outputs = [] for i in range(self.ensemble_num): critic_output = self.critics[i].predict(batch_obs, batch_actions) critic_output = layers.unsqueeze(critic_output, axes=[1]) critic_outputs.append(critic_output) score_matrix = layers.concat(critic_outputs, axis=1) # Normalize scores given by each critic sum_critic_score = layers.reduce_sum( score_matrix, dim=0, keep_dim=True) sum_critic_score = layers.expand( sum_critic_score, expand_times=[self.ensemble_num, 1]) norm_score_matrix = score_matrix / sum_critic_score actions_mean_score = layers.reduce_mean( norm_score_matrix, dim=1, keep_dim=True) best_score_id = layers.argmax(actions_mean_score, axis=0) best_score_id = layers.cast(best_score_id, dtype='int32') ensemble_predict_action = layers.gather(batch_actions, best_score_id) ensemble_predict_action = layers.squeeze( ensemble_predict_action, axes=[0]) return ensemble_predict_action
def value(self, obs, act): concat = layers.concat([obs, act], axis=1) # out = self.fc1(concat) # out = self.fc2(out) # out = self.fc3(out) # out = self.fc4(out) # out = self.fc5(out) o = self.obs_fc1(obs) o = self.obs_fc2(o) o = self.obs_fc3(o) a = self.act_fc1(act) a = self.act_fc2(a) a = self.act_fc3(a) c = self.total_fc1(concat) c = self.total_fc2(c) c = self.total_fc3(c) out = self.re_fc1(layers.concat([o, a, c], axis=1)) out = self.re_fc2(out) out = self.re_fc3(out) out = self.re_fc4(out) return layers.squeeze(out, axes=[1])
def value(self, obs, act): x = self.fc1(obs) concat = layers.concat([x, act], axis=1) x = self.fc2(concat) Q1 = self.fc3(x) Q1 = layers.squeeze(Q1, axes=[1]) y = self.fc4(obs) concat2 = layers.concat([y, act], axis=1) Q2 = self.fc5(concat2) Q2 = self.fc6(Q2) Q2 = layers.squeeze(Q2, axes=[1]) return Q1, Q2
def value(self, obs, act): hid1 = self.fc1(obs) concat1 = layers.concat([hid1, act], axis=1) Q1 = self.fc2(concat1) Q1 = self.fc3(Q1) Q1 = layers.squeeze(Q1, axes=[1]) hid2 = self.fc4(obs) concat2 = layers.concat([hid2, act], axis=1) Q2 = self.fc5(concat2) Q2 = self.fc6(Q2) Q2 = layers.squeeze(Q2, axes=[1]) return Q1, Q2
def value(self, obs, act): # 输入 state, action, 输出对应的Q(s,a) concat = layers.concat([obs, act], axis=1) hid = self.fc1(concat) Q = self.fc2(hid) Q = layers.squeeze(Q, axes=[1]) return Q
def value(self, obs_n, act_n): inputs = layers.concat(obs_n + act_n, axis=1) hid1 = self.fc1(inputs) hid2 = self.fc2(hid1) Q = self.fc3(hid2) Q = layers.squeeze(Q, axes=[1]) return Q
def value(self, obs, act): concat = layers.concat([obs, act], axis=1) Q1 = self.q1(concat) Q1 = layers.squeeze(Q1, axes=[1]) Q2 = self.q2(concat) Q2 = layers.squeeze(Q2, axes=[1]) return Q1, Q2
def forward(self, inputs, mode): """ don't use sequence_expand for backward """ is_test = True if (mode in ['test', 'inference']) else False # encode user_embedding = self._build_embeddings(inputs, self.user_slot_names) user_feature = self.user_feature_fc_op(user_embedding) # item embed and pos embed item_embedding = self._build_embeddings(inputs, self.item_slot_names) item_fc = self.item_fc_op(item_embedding) pos = fluid_sequence_get_pos(item_fc) pos_embed = self.dict_data_embed_op['pos'](pos) input_embed = layers.concat([ item_fc, pos_embed, layers.sequence_expand_as(user_feature, item_fc) ], 1) # transformer trans_in = self.input_embed_fc_op(input_embed) decoding = self.transformer_decode(is_test, trans_in) click_prob = self.output_fc2_op(self.output_fc1_op(decoding)) return click_prob
def value(self, obs, act): x = self.fc1(obs) concat = layers.concat([x, act], axis=1) x = self.fc2(concat) Q = self.fc3(x) Q = layers.squeeze(Q, axes=[1]) return Q
def sampling(self, inputs, sampling_type): decode_len = inputs['decode_len'] user_feature = self.user_encode(inputs) item_embedding = self._build_embeddings(inputs, self.item_slot_names) item_fc = self.item_fc_op(item_embedding) pos = fluid_sequence_get_pos(item_fc) pos_embed = self.dict_data_embed_op['pos'](pos) if self._candidate_encode: cand_encoding = self.candidate_encode(item_fc) init_hidden = self.candidate_encode_fc_op( layers.concat([user_feature, cand_encoding], 1)) else: init_hidden = user_feature eps = inputs['eps'] if sampling_type == 'eps_greedy' else None eta = inputs['eta'] if sampling_type == 'softmax' else None sampled_id = self.sampling_rnn(item_fc, h_0=init_hidden, pos_embed=pos_embed, forward_func=self.sampling_rnn_forward, sampling_type=sampling_type, eps=eps, eta=eta) sampled_id = self._cut_by_decode_len( layers.lod_reset(sampled_id, item_fc), decode_len) return sampled_id
def value(self, obs, act): concat = layers.concat([obs, act], axis=1) hidden1 = self.fc1(concat) hidden2 = self.fc2(hidden1) Q = self.fc3(hidden2) Q = layers.squeeze(Q, axes=[1]) return Q
def Q1(self, obs, act): hid1 = self.fc1(obs) concat1 = layers.concat([hid1, act], axis=1) Q1 = self.fc2(concat1) Q1 = self.fc3(Q1) Q1 = layers.squeeze(Q1, axes=[1]) return Q1
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): gru_input = self.item_gru_fc_op( layers.concat([independent_item_fc, independent_pos_embed], 1)) item_gru = self.item_gru_op(gru_input, h_0=independent_hidden) hidden_fc = self.hidden_fc_op(item_gru) return item_gru, hidden_fc
def value(self, obs, act): hid1 = self.fc1(obs) concat = layers.concat([hid1, act], axis=1) hid2 = self.fc2(concat) Q = self.fc3(hid2) Q = layers.squeeze(Q, axes=[1]) return Q
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): gru_input = self.item_gru_fc_op( layers.concat([independent_item_fc, independent_pos_embed], 1)) item_gru = self.item_gru_op(gru_input, h_0=independent_hidden) Q = self.out_Q_fc2_op(self.out_Q_fc1_op(item_gru)) scores = Q return item_gru, scores
def simple_step_rnn(self, item_fc, last_click_embedding, h_0): """ The same as self.train_rnn(item_fc, h_0, output_type='hidden') for a single step """ input_fc = self.item_gru_fc_op( layers.concat([item_fc, last_click_embedding], 1)) next_h_0 = self.item_gru_op(input_fc, h_0=h_0) return next_h_0
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): gru_input = self.item_gru_fc_op( layers.concat([independent_item_fc, independent_pos_embed], 1)) item_gru = self.item_gru_op(gru_input, h_0=independent_hidden) click_prob = self.out_fc2_op(self.out_fc1_op(item_gru)) scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2]) return item_gru, scores
def value(self, obs, act): # 因为s和a都是参数,神经网络中对于多向量输入可以使用联级的方法输入 # 所以我们先把它们拼起来 concat = layers.concat([obs, act], axis=1) hid = self.fc1(concat) Q = self.fc2(hid) Q = layers.squeeze(Q, axes=[1]) return Q
def _build_embeddings(self, inputs, list_names): list_embed = [] for name in list_names: embed_name = self._get_embed_name(name) c_embed = self.dict_data_embed_op[embed_name](inputs[name]) list_embed.append(c_embed) # (batch*num_items, 16) concated_embed = layers.concat(input=list_embed, axis=1) # (batch*num_items, concat_dim) concated_embed = layers.softsign(concated_embed) return concated_embed
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): item_concat = layers.concat( [independent_item_fc, independent_pos_embed, independent_hidden], 1) item_concat_fc = self.item_concat_fc_op(item_concat) click_prob = self.out_fc2_op(self.out_fc1_op(item_concat_fc)) scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2]) return independent_hidden, scores
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): raise NotImplementedError() # example: gru_input = self.item_gru_fc_op( layers.concat([independent_item_fc, independent_pos_embed], 1)) next_hidden = self.item_gru_op(gru_input, independent_hidden) scores = self.out_Q_fc2_op(self.out_Q_fc1_op(next_hidden)) return next_hidden, scores
def value(self, obs, act): concat = layers.concat([obs, act], axis=1) #concat = self.conv2(concat) hid = self.fc1(concat) Q = self.fc2(hid) Q = self.fc3(Q) Q = self.fc4(Q) Q = layers.squeeze(Q, axes=[1]) return Q
def predict(self, obs): real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim]) vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim]) hid0 = self.fc0(real_obs) hid1 = self.fc1(hid0) vel_hid0 = self.vel_fc0(vel_obs) vel_hid1 = self.vel_fc1(vel_hid0) concat = layers.concat([hid1, vel_hid1], axis=1) hid2 = self.fc2(concat) means = self.fc3(hid2) return means
def _build_embeddings(self, inputs, list_names): list_embed = [] for name in list_names: embed_name = self._get_embed_name(name) c_embed = self.dict_data_embed_op[embed_name](inputs[name]) if len(c_embed.shape) == 3: # squeeze (batch*num_items, None, 16) c_embed = layers.reduce_sum(c_embed, dim=1) list_embed.append(c_embed) # (batch*num_items, 16) concated_embed = layers.concat(input=list_embed, axis=1) # (batch*num_items, concat_dim) concated_embed = layers.softsign(concated_embed) return concated_embed
def _build_embeddings(self, inputs, list_names): list_embed = [] for name in list_names: # message = "%s %d" % (name, self.npz_config['embedding_size'][name]) # layers.Print(layers.reduce_max(inputs[name]), summarize=32, print_tensor_lod=False, message=message) c_embed = self.dict_data_embed_op[name](inputs[name]) list_embed.append(c_embed) # (batch*seq_lens, 16) concated_embed = layers.concat(input=list_embed, axis=1) # (batch*seq_lens, concat_dim) concated_embed = layers.softsign(concated_embed) return concated_embed
def forward(self, inputs, mode): """forward""" # encode user_embedding = self._build_embeddings(inputs, self.user_slot_names) user_feature = self.user_feature_fc_op(user_embedding) # item embed + pos embed item_embedding = self._build_embeddings(inputs, self.item_slot_names) item_fc = self.item_fc_op(item_embedding) pos = fluid_sequence_get_pos(item_fc) pos_embed = self.dict_data_embed_op['pos'](pos) # item gru gru_input = self.item_gru_fc_op(layers.concat([item_fc, pos_embed], 1)) item_gru_forward = self.item_gru_forward_op(gru_input, h_0=user_feature) item_gru_backward = self.item_gru_backward_op(gru_input, h_0=user_feature) item_gru = layers.concat([item_gru_forward, item_gru_backward], axis=1) click_prob = self.out_click_fc2_op(self.out_click_fc1_op(item_gru)) return click_prob
def forward(self, inputs): """forward""" user_feature = self.user_encode(inputs) item_embedding = self._build_embeddings(inputs, self.item_slot_names) item_fc = self.item_fc_op(item_embedding) pos = fluid_sequence_get_pos(item_fc) pos_embed = self.dict_data_embed_op['pos'](pos) gru_input = self.item_gru_fc_op(layers.concat([item_fc, pos_embed], 1)) item_gru = self.item_gru_op(gru_input, h_0=user_feature) click_prob = self.out_fc2_op(self.out_fc1_op(item_gru)) return click_prob
def value(self, obs, act): # 输入 state, action, 输出对应的Q(s,a) ###################################################################### ###################################################################### concat = layers.concat([obs,act], axis=1) hid0 = self.fc1(concat) hid1 = self.fc2(hid0) hid2 = self.fc3(hid1) Q = layers.squeeze(hid2, axes=[1]) ###################################################################### ###################################################################### return Q
def predict(self, obs, action): real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim]) vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim]) hid0 = self.fc0(real_obs) hid1 = self.fc1(hid0) vel_hid0 = self.vel_fc0(vel_obs) vel_hid1 = self.vel_fc1(vel_hid0) a1 = self.act_fc0(action) concat = layers.concat([hid1, a1, vel_hid1], axis=1) hid2 = self.fc2(concat) V = self.fc3(hid2) V = layers.squeeze(V, axes=[1]) return V
def value(self, hidden, act): # 输入 state, action, 输出对应的Q(s,a) ###################################################################### ###################################################################### # # 5. 请组装Q网络 # flatten_obs = layers.flatten(hidden, axis=1) concat = layers.concat([flatten_obs, act], axis=1) hid = self.fc1(concat) Q = self.fc2(hid) Q2 = layers.squeeze(Q, axes=[1]) return Q2