def value(self, obs): obs = obs / 255.0 out = self.conv1(obs) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv2(out) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv3(out) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv4(out) out = layers.flatten(out, axis=1) if self.algo == 'Dueling': As = self.fc2_adv(self.fc1_adv(out)) V = self.fc2_val(self.fc1_val(out)) Q = As + (V - layers.reduce_mean(As, dim=1, keep_dim=True)) else: Q = self.fc1(out) return Q
def value(self, obs): obs = layers.flatten(obs, axis=1) hid1 = self.fc1(obs) # concat1 = layers.concat([hid1, act], axis=1) hid2 = self.fc2(hid1) V = self.value_fc(hid2) V = layers.squeeze(V, axes=[1]) return V
def value(self, obs): # 定义网络 h1 = self.cc1(obs) h2 = self.cc2(h1) h3 = self.cc3(h2) h4 = layers.flatten(h3, axis=1) Q = self.fc1(h4) return Q
def value(self, obs): #输入归一化 obs = obs / 255.0 out = self.conv1(obs) out = self.conv2(out) out = self.conv3(out) out = layers.flatten(out, axis=1) out = self.fc0(out) out = self.fc1(out) return out
def policy(self, obs): # fc01 = self.fc01(obs) obs = layers.flatten(obs, axis=1) hid1 = self.fc1(obs) hid2 = self.fc2(hid1) means = self.mean_linear(hid2) log_std = self.log_std_linear(hid2) log_std = layers.clip(log_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX) return means, log_std
def obs_ae(self, obs): obs = obs / 255.0 conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) shape_conv3 = conv3.shape flatten = layers.flatten(conv3, axis=1) fc = self.fc(flatten) defc = self.decoder[0](fc) x = layers.reshape(defc, shape_conv3) for layer in self.decoder[1:]: x = layer(x) return x
def value(self, hidden, act): # 输入 state, action, 输出对应的Q(s,a) ###################################################################### ###################################################################### # # 5. 请组装Q网络 # flatten_obs = layers.flatten(hidden, axis=1) concat = layers.concat([flatten_obs, act], axis=1) hid = self.fc1(concat) Q = self.fc2(hid) Q2 = layers.squeeze(Q, axes=[1]) return Q2
def policy(self, obs): """ Args: obs: A float32 tensor of shape [B, C, H, W] Returns: policy_logits: B * ACT_DIM """ obs = obs / 255.0 conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) policy_conv = self.policy_conv(conv3) policy_logits = layers.flatten(policy_conv, axis=1) return policy_logits
def value(self, obs): """ Args: obs: A float32 tensor of shape [B, C, H, W] Returns: value: B """ obs = obs / 255.0 conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) flatten = layers.flatten(conv3, axis=1) value = self.value_fc(flatten) value = layers.squeeze(value, axes=[1]) return value
def value(self, obs): #输入归一化 obs = obs / 255.0 out = self.conv1(obs) out = self.conv2(out) out = self.conv3(out) out = layers.flatten(out, axis=1) out = self.fc0(out) out = self.fc1(out) V = self.valueFc(out) advantage = self.advantageFc(out) #计算优势函数的均值,用于归一化 advMean = fluid.layers.reduce_mean(advantage, dim=1, keep_dim=True) #状态行为值函数Q=V+A Q = advantage + (V - advMean) return Q
def value(self, obs): obs = obs / 255.0 out = self.conv1(obs) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max', data_format="NHWC") out = self.conv2(out) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max', data_format="NHWC") out = layers.flatten(out, axis=1) Q = self.fc1(out) return Q
def policy(self, obs): """ Args: obs: A float32 tensor of shape [B, C, H, W] Returns: policy_logits: B * ACT_DIM """ obs = obs / 255.0 conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) flatten = layers.flatten(conv3, axis=1) fc_output = self.fc(flatten) policy_logits = self.policy_fc(fc_output) return policy_logits
def policy(self, obs): """ Args: obs: 输入的图像,shape为[N, C, H, W] Returns: policy_logits: N * ACTION_DIM """ conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) conv4 = self.conv4(conv3) flatten = layers.flatten(conv4, axis=1) fc_output = self.fc(flatten) policy_logits = self.policy_fc(fc_output) return policy_logits
def value(self, obs): """ Args: obs: 输入的图像,shape为[N, C, H, W] Returns: values: N """ conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) conv4 = self.conv4(conv3) flatten = layers.flatten(conv4, axis=1) fc_output = self.fc(flatten) values = self.value_fc(fc_output) values = layers.squeeze(values, axes=[1]) return values
def value(self, obs): obs = obs / 255.0 # print(len(obs.shape)) # if len(obs.shape)>4: # obs = layers.squeeze(input=obs,axes=[-1]) # obs = layers.squeeze(input=obs) out = self.conv1(obs) out = layers.pool2d( input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv2(out) out = layers.pool2d( input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv3(out) out = layers.pool2d( input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv4(out) out = layers.flatten(out, axis=1) Q = self.fc1(out) return Q
def value(self, obs): out = self.conv1(obs) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv2(out) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv3(out) out = layers.pool2d(input=out, pool_size=2, pool_stride=2, pool_type='max') out = self.conv4(out) out = layers.flatten(out, axis=1) Q = self.fc1(out) return Q
def policy_and_value(self, obs): """ Args: obs: A float32 tensor of shape [B, C, H, W] Returns: policy_logits: B * ACT_DIM values: B """ obs = obs / 255.0 conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) flatten = layers.flatten(conv3, axis=1) fc_output = self.fc(flatten) policy_logits = self.policy_fc(fc_output) values = self.value_fc(fc_output) values = layers.squeeze(values, axes=[1]) return policy_logits, values
def value(self, obs): # obs = obs / 255.0 print('value', obs.shape) out = self.conv1(obs) out = layers.pool2d(input=out, pool_size=4, pool_stride=2, pool_type='max') # out = self.conv2(out) # out = layers.pool2d( # input=out, pool_size=2, pool_stride=2, pool_type='max') # out = self.conv3(out) # out = layers.pool2d( # input=out, pool_size=2, pool_stride=2, pool_type='max') # out = self.conv4(out) out = layers.flatten(out, axis=1) start = self.fc1(out) start = paddle.fluid.layers.softmax(start) end = self.fc2(out) end = paddle.fluid.layers.softmax(end) num = self.fc3(out) vec = layers.concat([start, end, num], axis=1) return vec
def policy_and_value(self, obs): """ Args: obs: 输入的图像,shape为[N, C, H, W] Returns: policy_logits: N * ACTION_DIM values: N """ conv1 = self.conv1(obs) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) conv4 = self.conv4(conv3) flatten = layers.flatten(conv4, axis=1) fc_output = self.fc(flatten) policy_logits = self.policy_fc(fc_output) values = self.value_fc(fc_output) values = layers.squeeze(values, axes=[1]) return policy_logits, values