#max_step - number of steps within an episode

Experiments = 10
Experiments_All_Rewards = np.zeros(shape=(max_n_ep, Experiments))

lbda = 1
perdatapoint = 0
prior = log_normal
coupling = 4

for e in range(Experiments):

    print("Experiment", e)

    value_function = BHN_Q_Network(lbda=lbda,
                                   perdatapoint=perdatapoint,
                                   prior=prior,
                                   coupling=coupling)

    epsilon = 0.1
    #decay rate for the temperature parameter
    discount = 0.9

    agent = AgentEpsGreedy(n_actions=n_actions,
                           value_function_model=value_function,
                           state_dim=state_dim,
                           batch_size=batch_size,
                           eps=epsilon)
    memory = ReplayMemory(max_size=100000)

    loss_per_ep = []
    w1_m_per_ep = []
Пример #2
0





for e in range(Experiments):

    print ("Experiment", e)


    weight_shapes = [(512, 256), (256, 2)]
    coupling_dim = 256

    value_function = BHN_Q_Network(lbda=lbda, perdatapoint=perdatapoint, prior=prior, coupling=coupling, wn=WN, 
            weight_shapes=weight_shapes,
            coupling_dim=coupling_dim)

    epsilon = 0.1
    #decay rate for the temperature parameter
    discount = 0.9

    agent = AgentEpsGreedy(n_actions=n_actions, value_function_model=value_function, state_dim=state_dim, batch_size=batch_size, eps=epsilon)
    memory = ReplayMemory(max_size=100000)



    loss_per_ep = []
    w1_m_per_ep = []
    w2_m_per_ep = []
    w3_m_per_ep = []