Exemplo n.º 1
0
def inplace_sub(x, i, v):
  """Applies an inplace sub on input x at index i with value v.

  Note that this function is not actually inplace - it allocates
  a copy of x.  The utility is not avoiding memory copies but rather
  specifying a sparse update.

  If i is None, x and v must be the same shape. Computes
    y = x; y -= v;
  If i is a scalar, x has a rank 1 higher than v's. Computes
    y = x; y[i, :] -= v;
  Otherwise, x and v must have the same rank. Computes
    y = x; y[i, :] -= v;

  Args:
    x: A Tensor.
    i: None, a scalar or a vector.
    v: A Tensor.

  Returns:
    Returns y, which is guaranteed not to be an alias of x.

  """
  return alias_inplace_sub(gen_array_ops.deep_copy(x), i, v)
Exemplo n.º 2
0
def inplace_sub(x, i, v):
  """Applies an inplace sub on input x at index i with value v.

  Note that this function is not actually inplace - it allocates
  a copy of x.  The utility is not avoiding memory copies but rather
  specifying a sparse update.

  If i is None, x and v must be the same shape. Computes
    y = x; y -= v;
  If i is a scalar, x has a rank 1 higher than v's. Computes
    y = x; y[i, :] -= v;
  Otherwise, x and v must have the same rank. Computes
    y = x; y[i, :] -= v;

  Args:
    x: A Tensor.
    i: None, a scalar or a vector.
    v: A Tensor.

  Returns:
    Returns y, which is guaranteed not to be an alias of x.

  """
  return alias_inplace_sub(gen_array_ops.deep_copy(x), i, v)
Exemplo n.º 3
0
model(np.ones([1, 3]))
# model = keras.models.load_model("cartpole_model_with_TD(lambda)_60000steps.h5")
optimizer = keras.optimizers.RMSprop(learning_rate=0.01,
                                     momentum=0.95,
                                     rho=0.95)
model.summary()

programing_times = 10
replay_memory_length = 1000
replay_memory = []
gamma = 0.99

old_theta = []
for each in model.trainable_variables:
    old_theta.append(deep_copy(each))

max_episode_num = 10
for episode in range(max_episode_num):

    print(f'Episode: {episode+1}')
    t = 0
    state = env.reset()
    step = 0
    rewards = 0

    while True:

        action = greedy_action(model, state)
        state_next, reward, done, _ = env.step(action)
        if done and step < 300: