model.setRewardBounds(reward_bounds) model.setActionBounds(action_bounds) arr = list(range(len(states_))) random.shuffle(arr) given_actions = [] given_states = [] for i in range(len(states_)): a = actions[arr[i]] action_ = np.array([a]) given_actions.append(action_) state_ = np.array([states_[arr[i]]]) next_state_ = np.array([next_states_[arr[i]]]) given_states.append(state_) # print "Action: " + str([actions[i]]) experience.insert(state_, action_, next_state_, np.array([1])) # print ("Added tuple: ", i) errors = [] for i in range(settings['rounds']): # print ("Actions: ", _actions) # print ("States: ", _states) # (error, lossActor) = model.train(_states, _actions, _result_states, _rewards) for j in range(1): _states, _actions, _result_states, _rewards, falls_, advantage, exp_actions__ = experience.get_batch( batch_size) error = model.trainCritic(_states, _actions, _result_states, _rewards) for j in range(5): _states, _actions, _result_states, _rewards, falls_, advantage, exp_actions__ = experience.get_batch( batch_size)
experience.setRewardBounds(reward_bounds) experience.setActionBounds(action_bounds) experience.setSettings(settings) arr = list(range(experience_length)) random.shuffle(arr) num_samples_to_keep = 300 given_actions = [] given_states = [] for i in range(num_samples_to_keep): a = actions[arr[i]] action_ = np.array([a]) given_actions.append(action_) state_ = np.array([states[arr[i]]]) given_states.append(state_) # print "Action: " + str([actions[i]]) experience.insert(state_, state_, action_, np.array([0])) errors = [] for i in range(1000): _states, _actions, _result_states, _rewards, fals_, _G_ts, advantage = experience.get_batch( batch_size) # print ("Actions: ", _actions) # print ("States: ", _states) error = model.train(_states, _states, _result_states, _actions) errors.append(error) # print "Error: " + str(error) states = np.linspace(-5.0, 5.0, experience_length) actionsNoNoise = np.array(map(f, states)) # print ("Eval States: ", np.transpose(np.array([states])))
# states2 = np.transpose(np.repeat([states], 2, axis=0)) # print states2 model = NeuralNetwork(len(state_bounds[0]), len(action_bounds[0]), state_bounds, action_bounds, settings) experience = ExperienceMemory(len(state_bounds[0]), len(action_bounds[0]), experience_length, continuous_actions=True) for i in range(experience_length): action_ = np.array([actions[i]]) state_ = np.array([states[i]]) # print "Action: " + str([actions[i]]) experience.insert(norm_state(state_, state_bounds), norm_action(action_, action_bounds), norm_state(state_, state_bounds), norm_reward(np.array([0]), reward_bounds)) errors = [] for i in range(5000): _states, _actions, _result_states, _rewards = experience.get_batch( batch_size) # print _actions error = model.train(_states, _actions) errors.append(error) # print "Error: " + str(error) states = np.linspace(-1.0, 6.0, experience_length) actionsNoNoise = np.array(map(f, states)) predicted_actions = np.array(list(map(model.predict, states)))