示例#1
0
                    "--memory-size",
                    dest="memory_size",
                    default=5000,
                    type=int,
                    help='Numpber of memory size (default: 5000)')
args = parser.parse_args()

start_date = utils.format(args.start_date)
end_date = utils.format(args.end_date)

env = Env(start_date, end_date)
agent = Agent(env.actions, len(env.columns), env.state_size, args.memory_size)

if args.load:
    print("[Agent] load model")
    agent.load_model()

terminal = False
n_epochs = args.n_epochs
loops = -1
e = 0
total_frame = 0
do_replay_count = 0
max_step = 0
start_replay = False

optimistic = 3
optimistic_num = len(env.actions) * optimistic


def optimistic_action(env, epoch, optimistic):
示例#2
0
from dqn_agent import ReplayBuffer
from dqn_agent import Agent


LOAD_PREV_MODEL = False
SET_EPS = False
MODEL_FILE_NAME = "MODEL_CHECKPOINT.34994.model"

max_games = 1000
max_steps = 1000

my_agent = Agent(37, action_size, 0)

if LOAD_PREV_MODEL:
    my_agent.load_model(MODEL_FILE_NAME, 37, action_size, 0)

#my_buffer = ReplayBuffer(4, 1000, 10, 0)
epsilon = 0.0
min_eps = 0.1
all_scores = []

#env_info = env.reset(train_mode=False)[brain_name] # reset the environment
#state = env_info.vector_observations[0]            # get the current state
#score = 0           # initialize the score

for idx_games in range(1, max_games):
    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    state = env_info.vector_observations[0]  # get the current state
    score = 0  # initialize the score
    print("GAME NUMBER: " + str(idx_games))
示例#3
0
def main(limit_episode, limit_step, seed=0, load=False):
    rospy.init_node('dqn_pendulum_myrrbot7')

    pub = rospy.Publisher("/myrrbot7/joint1_cotroller/command",
                          Float64,
                          queue_size=1)

    loop_rate = rospy.Rate(hz)

    result_path = "/home/amsl/ros_catkin_ws/src/deep_actor_critic/actor_critic_for_swingup/test_results/results/dqn_result.txt"
    model_path = "/home/amsl/ros_catkin_ws/src/deep_actor_critic/actor_critic_for_swingup/test_results/models/dqn_"

    init_theta = 0.0
    init_omega = 0.0

    state = get_state(init_theta, init_omega)
    state_dash = get_state(init_theta, init_omega)
    action = 0.0
    reward = 0.0
    action_list = [
        np.array([a], dtype=np.float64) for a in [min_torque, max_torque]
    ]
    n_st = len(state[0])
    n_act = len(action_list)

    Q_list = np.array([])
    max_Q = 0.0
    ave_Q = 0.0
    reward_list = np.array([])
    ave_reward = 0.0
    total_reward = 0.0

    temp_result = np.array([[]])
    test_result = np.array([[]])

    evaluation_flag = False
    wait_flag = True

    agent = Agent(n_st, n_act, seed)

    if load:
        agent.load_model(model_path)

    episode_count = 0
    time = 1
    count = 0
    wait_count = 0

    while not rospy.is_shutdown():
        if wait_flag:
            wait_count += 1
            #  print "wait_count : ", wait_count

            state = get_state(init_theta, init_omega)
            state_dash = get_state(init_theta, init_omega)
            reset_client(init_theta)
            action = 0.0
            pub.publish(action)

            if wait_count == 0.5 * hz:
                init_theta = uniform(-1 * math.pi, math.pi)

            if wait_count % hz == 0:
                wait_count = 0
                wait_flag = False
                #  print "Please Wait 1 second"
                #  print "state : ", state
                #  print "state_dash : ", state_dash
        else:
            if not evaluation_flag:
                #  print "Now Learning!!!!!"
                #  print "episode : ", episode_count
                #  print "time : ", time
                #  print "state : ", state
                act_i, q = agent.get_action(state, False)
                Q_list = np.append(Q_list, [q])
                #  print "act_i : ", act_i
                action = action_list[act_i]
                #  print "action : ", action
                pub.publish(action)
                theta, omega = get_joint_properties('myrrbot7_joint1')
                #  print "theta : %f, omega : %f" % (theta, omega)
                state_dash = get_state(theta, omega)
                #  print "state_dash : ", state_dash
                reward = get_reward(theta, omega, action)
                reward_list = np.append(reward_list, [reward])
                #  print "reward : ", reward
                ep_end = get_ep_end(theta, time, limit_step)
                #  print "ep_end : ", ep_end
                agent.stock_experience(count, state, act_i, reward, state_dash,
                                       ep_end)
                agent.train(count)

                time += 1
                count += 1

                if ep_end:
                    max_Q = np.max(Q_list)
                    ave_Q = np.average(Q_list)
                    #  print "max_Q : ", max_Q
                    #  print "ave_Q : ",ave_Q
                    ave_reward = np.average(reward_list)
                    total_reward = np.sum(reward_list)
                    #  print "ave_reward : ", ave_reward
                    #  print "total_reward : ", total_reward

                    print "Episode : %d\t/Reward Sum : %f\tEpsilon : %f\tLoss : %f\t/Average Q : %f\t/Time Step : %d" % (
                        episode_count, total_reward, agent.epsilon, agent.loss,
                        np.sum(Q_list) / float(time), agent.step + 1)
                    Q_list = np.array([])
                    reward_list = np.array([])
                    temp_result = np.array(([[
                        episode_count, max_Q, ave_Q, ave_reward, total_reward
                    ]]),
                                           dtype=np.float32)

                    if episode_count == 0:
                        #  print "test_result : ", test_result
                        test_result = temp_result
                        #  print "test_result : ", test_result
                    else:
                        test_result = np.r_[test_result, temp_result]

                    save_result(result_path, test_result)
                    agent.save_model(model_path)

                    if episode_count % 1 == 0:
                        evaluation_flag = False

                    episode_count += 1
                    time = 0
                    wait_flag = True
            else:
                #  print "Now evaluation!!!"
                #  print "episode : ", episode_count-1
                #  print "time : ", time
                #  print "state : ", state
                act_i, q = agent.get_action(state, True)
                Q_list = np.append(Q_list, [q])
                #  print "act_i : ", act_i
                #  print "Q_list : ", Q_list
                action = action_list[act_i]
                #  print "action : ", action
                pub.publish(action)
                theta, omega = get_joint_properties('myrrbot7_joint1')
                #  print "theta : %f, omega : %f" % (theta, omega)
                state_dash = get_state(theta, omega)
                #  print "state_dash : ", state_dash
                reward = get_reward(theta, omega, action)
                #  print "reward : ", reward
                reward_list = np.append(reward_list, [reward])
                #  print "reward_list : ", reward_list
                ep_end = get_ep_end(theta, time, limit_step)
                #  print "ep_end : ", ep_end
                if ep_end:
                    max_Q = np.max(Q_list)
                    ave_Q = np.average(Q_list)
                    #  print "max_Q : ", max_Q
                    #  print "ave_Q : ",ave_Q
                    ave_reward = np.average(reward_list)
                    total_reward = np.sum(reward_list)
                    #  print "ave_reward : ", ave_reward
                    #  print "total_reward : ", total_reward

                    print "Episode : %d\t/Reward Sum : %f\tEpsilon : %f\tLoss : %f\t/Average Q : %f\t/Time Step : %d" % (
                        episode_count - 1, total_reward, agent.epsilon,
                        agent.loss, np.sum(Q_list) / float(time + 1),
                        agent.step)
                    Q_list = np.array([])
                    reward_list = np.array([])

                    time = 0
                    wait_flag = True
                    evaluation_flag = False

                    temp_result = np.array(([[
                        episode_count - 1, max_Q, ave_Q, ave_reward,
                        total_reward
                    ]]),
                                           dtype=np.float32)

                    if episode_count - 1 == 0:
                        #  print "test_result : ", test_result
                        test_result = temp_result
                        #  print "test_result : ", test_result
                    else:
                        test_result = np.r_[test_result, temp_result]

                    save_result(result_path, test_result)
                    agent.save_model(model_path)

                time += 1

        loop_rate.sleep()
示例#4
0
# examine the state space
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

##################################
########### LOAD AGENT ###########
##################################

agent = Agent(state_size=state_size,
              action_size=action_size,
              dueling=dueling,
              seed=0)
agent.load_model(path_to_model)

env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
state = env_info.vector_observations[0]  # get the current state
score = 0  # initialize the score
while True:
    action = agent.act(state, 0)  # select an action
    env_info = env.step(action)[
        brain_name]  # send the action to the environment
    next_state = env_info.vector_observations[0]  # get the next state
    reward = env_info.rewards[0]  # get the reward
    done = env_info.local_done[0]  # see if episode has finished
    score += reward  # update the score
    state = next_state  # roll over the state to next time step
    if done:  # exit loop if episode finished
        break