max_n_ep = 2000 #number of episodes #max_step - number of steps within an episode Experiments = 1 Experiments_All_Rewards = np.zeros(shape=(max_n_ep)) for e in range(Experiments): value_function = ValueFunctionDQN_TEST_TRAIN_DROPOUT(state_dim=state_dim, n_actions=n_actions, batch_size=batch_size) epsilon = 0.1 #decay rate for the temperature parameter # discount = 0.9 agent = AgentEpsGreedy(n_actions=n_actions, value_function_model=value_function, eps=epsilon) memory = ReplayMemory(max_size=100000) loss_per_ep = [] w1_m_per_ep = [] w2_m_per_ep = [] w3_m_per_ep = [] total_reward = [] #at the start of episodes #initialise the Q network with dropout probability 0.9 #set current model index M with p=0.9 dropout_probability = 0.9 ep = 0 avg_Rwd = -np.inf episode_end_msg = 'loss={:2.10f}, w1_m={:3.1f}, w2_m={:3.1f}, w3_m={:3.1f}, total reward={}'
#max_step - number of steps within an episode Experiments = 10 Experiments_All_Rewards = np.zeros(shape=(max_n_ep, Experiments)) for e in range(Experiments): #using a smaller network for the value function value_function = ValueFunctionDQN_TEST_TRAIN_DROPOUT(state_dim=state_dim, n_actions=n_actions, batch_size=batch_size) #this is the temperature parameter in Boltzmann Distribution tau_parameter = 1 agent = AgentEpsGreedy(n_actions=n_actions, value_function_model=value_function, eps=tau_parameter) memory = ReplayMemory(max_size=100000) print('Experiment Number ', e) loss_per_ep = [] w1_m_per_ep = [] w2_m_per_ep = [] w3_m_per_ep = [] total_reward = [] ep = 0 avg_Rwd = -np.inf episode_end_msg = 'loss={:2.10f}, w1_m={:3.1f}, w2_m={:3.1f}, w3_m={:3.1f}, total reward={}'