def predict(self, obs): real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim]) vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim]) hid0 = self.fc0(real_obs) hid1 = self.fc1(hid0) vel_hid0 = self.vel_fc0(vel_obs) vel_hid1 = self.vel_fc1(vel_hid0) concat = layers.concat([hid1, vel_hid1], axis=1) hid2 = self.fc2(concat) means = self.fc3(hid2) return means
def predict(self, obs, action): real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim]) vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim]) hid0 = self.fc0(real_obs) hid1 = self.fc1(hid0) vel_hid0 = self.vel_fc0(vel_obs) vel_hid1 = self.vel_fc1(vel_hid0) a1 = self.act_fc0(action) concat = layers.concat([hid1, a1, vel_hid1], axis=1) hid2 = self.fc2(concat) V = self.fc3(hid2) V = layers.squeeze(V, axes=[1]) return V
def dynamic_rnn(self, item_fc, h_0, output_type=None, double_type=None, double_id=None): drnn = fluid.layers.DynamicRNN() pos = fluid_sequence_get_pos(item_fc) with drnn.block(): cur_item_fc = drnn.step_input(item_fc) cur_h_0 = drnn.memory(init=h_0, need_reorder=True) cur_item_fc = layers.lod_reset(cur_item_fc, cur_h_0) next_h_0 = self.simple_step_rnn(cur_item_fc, h_0=cur_h_0) if output_type == 'c_Q': Q = self.out_Q_fc2_op(self.out_Q_fc1_op(next_h_0)) drnn.output(Q) elif output_type in ['max_Q', 'double_Q']: # batch_size = 2 # item_fc: lod = [0,4,7] # cur_h_0: lod = [0,1,2] item_fc = drnn.static_input(item_fc) pos = drnn.static_input(pos) cur_step = drnn.memory(shape=[1], dtype='int64', value=0) expand_h_0 = layers.sequence_expand(cur_h_0, item_fc) # lod = [0,1,2,3,4,5,6,7] new_item_fc = layers.lod_reset(item_fc, expand_h_0) # lod = [0,1,2,3,4,5,6,7] next_expand_h_0 = self.simple_step_rnn(new_item_fc, expand_h_0) # lod = [0,1,2,3,4,5,6,7] next_expand_h_0 = layers.lod_reset(next_expand_h_0, item_fc) # lod = [0,4,7] expand_Q = self.out_Q_fc2_op(self.out_Q_fc1_op(next_expand_h_0)) cur_step_id = layers.slice(cur_step, axes=[0, 1], starts=[0, 0], ends=[1, 1]) mask = layers.cast(pos >= cur_step_id, 'float32') expand_Q = expand_Q * mask if output_type == 'max_Q': max_Q = layers.sequence_pool(expand_Q, 'max') # lod = [0,1,2] drnn.output(max_Q) elif output_type == 'double_Q': if double_type == 'max_id': max_id = self.eps_greedy_sampling(expand_Q, mask, eps=0) drnn.output(max_id) elif double_type == 'double_Q': cur_double_id = drnn.step_input(double_id) double_Q = fluid_sequence_index(expand_Q, cur_double_id) drnn.output(double_Q) # update next_step = cur_step + 1 drnn.update_memory(cur_step, next_step) elif output_type == 'hidden': drnn.output(next_h_0) else: raise NotImplementedError(output_type) # update drnn.update_memory(cur_h_0, next_h_0) drnn_output = drnn() return drnn_output
def value(self, obs, action): real_obs = layers.slice( obs, axes=[1], starts=[0], ends=[-self.vel_obs_dim]) # target related fetures vel_obs = layers.slice( obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim]) hid0 = self.fc0(real_obs) hid1 = self.fc1(hid0) vel_hid0 = self.vel_fc0(vel_obs) vel_hid1 = self.vel_fc1(vel_hid0) a1 = self.act_fc0(action) concat = layers.concat([hid1, a1, vel_hid1], axis=1) hid2 = self.fc2(concat) Q = self.fc3(hid2) Q = layers.squeeze(Q, axes=[1]) return Q
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): gru_input = self.item_gru_fc_op( layers.concat([independent_item_fc, independent_pos_embed], 1)) item_gru = self.item_gru_op(gru_input, h_0=independent_hidden) click_prob = self.out_fc2_op(self.out_fc1_op(item_gru)) scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2]) return item_gru, scores
def sampling_rnn_forward(self, independent_item_fc, independent_hidden, independent_pos_embed): item_concat = layers.concat( [independent_item_fc, independent_pos_embed, independent_hidden], 1) item_concat_fc = self.item_concat_fc_op(item_concat) click_prob = self.out_fc2_op(self.out_fc1_op(item_concat_fc)) scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2]) return independent_hidden, scores
def train_rnn(self, item_fc, atten_item_fc, h_0, pos, pos_embed, output_type=''): shifted_item_fc = fluid_sequence_advance(item_fc, OOV=0) drnn = fluid.layers.DynamicRNN() with drnn.block(): cur_item_fc = drnn.step_input(shifted_item_fc) cur_pos_embed = drnn.step_input(pos_embed) cur_h_0 = drnn.memory(init=h_0, need_reorder=True) # step_input will remove lod info cur_item_fc = layers.lod_reset(cur_item_fc, cur_h_0) cur_pos_embed = layers.lod_reset(cur_pos_embed, cur_h_0) next_h_0, hidden_fc = self.sampling_rnn_forward( cur_item_fc, cur_h_0, cur_pos_embed) if output_type == 'c_Q': cur_atten_item_fc = drnn.step_input(atten_item_fc) cur_atten_item_fc = layers.lod_reset(cur_atten_item_fc, cur_h_0) Q = layers.reduce_sum(hidden_fc * cur_atten_item_fc, dim=1, keep_dim=True) drnn.output(Q) elif output_type == 'max_Q': cur_pos = drnn.step_input(pos) pos = drnn.static_input(pos) atten_item_fc = drnn.static_input(atten_item_fc) expand_Q = self._dot_attention(hidden_fc, atten_item_fc) cur_step_id = layers.slice(cur_pos, axes=[0, 1], starts=[0, 0], ends=[1, 1]) mask = layers.cast(pos >= cur_step_id, 'float32') expand_Q = expand_Q * mask max_Q = layers.sequence_pool(expand_Q, 'max') drnn.output(max_Q) else: raise NotImplementedError(output_type) # update drnn.update_memory(cur_h_0, next_h_0) drnn_output = drnn() return drnn_output
def train_rnn(self, item_fc, h_0, pos, pos_embed, output_type=''): drnn = fluid.layers.DynamicRNN() with drnn.block(): cur_item_fc = drnn.step_input(item_fc) cur_pos_embed = drnn.step_input(pos_embed) cur_h_0 = drnn.memory(init=h_0, need_reorder=True) # step_input will remove lod info cur_item_fc = layers.lod_reset(cur_item_fc, cur_h_0) cur_pos_embed = layers.lod_reset(cur_pos_embed, cur_h_0) next_h_0, Q = self.sampling_rnn_forward(cur_item_fc, cur_h_0, cur_pos_embed) if output_type == 'c_Q': drnn.output(Q) elif output_type == 'max_Q': # e.g. batch_size = 2 # cur_h_0: lod = [0,1,2] cur_pos = drnn.step_input(pos) pos = drnn.static_input(pos) # lod = [0,4,7] item_fc = drnn.static_input(item_fc) # lod = [0,4,7] # expand expand_h_0 = layers.sequence_expand( cur_h_0, item_fc) # lod = [0,1,2,3,4,5,6,7] expand_pos_embed = layers.sequence_expand( cur_pos_embed, item_fc) # lod = [0,1,2,3,4,5,6,7] expand_item_fc = layers.lod_reset(item_fc, expand_h_0) # forward _, expand_scores = self.sampling_rnn_forward( expand_item_fc, expand_h_0, expand_pos_embed) # reset result lod expand_Q = layers.lod_reset(expand_scores, item_fc) # lod = [0,4,7] cur_step_id = layers.slice(cur_pos, axes=[0, 1], starts=[0, 0], ends=[1, 1]) mask = layers.cast(pos >= cur_step_id, 'float32') expand_Q = expand_Q * mask max_Q = layers.sequence_pool(expand_Q, 'max') # lod = [0,1,2] drnn.output(max_Q) else: raise NotImplementedError(output_type) # update drnn.update_memory(cur_h_0, next_h_0) drnn_output = drnn() return drnn_output