def get_reward_distribution():
    global mean_list

    mean_list = np.zeros(10)

    # get the 10 q*(a)
    for i in range(10):
        mean_list[i] = rand_norm(0.0, 1.0)
Exemplo n.º 2
0
def env_step(this_action): # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    global this_reward_observation, rewards

    avg_reward = rewards[this_action[0]]
    the_reward = rand_norm(avg_reward, 1.0) # rewards drawn from (0, 1) Gaussian
    this_reward_observation = (the_reward, this_reward_observation[1], False)

    return this_reward_observation
Exemplo n.º 3
0
def env_step(
    this_action
):  # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    global this_reward_observation
    the_reward = rand_norm(reward_distribution[int(this_action)], 1.0)

    this_reward_observation = (the_reward, this_reward_observation[1], False)

    return this_reward_observation
Exemplo n.º 4
0
def env_step(
    this_action
):  # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    global this_reward_observation

    the_reward = rand_norm(distributions[int(this_action)],
                           1.0)  # rewards drawn from (0, 1) Gaussian
    this_reward_observation = (the_reward, this_reward_observation[1], False)
    return this_reward_observation
Exemplo n.º 5
0
def env_init():
    global this_reward_observation,q
    local_observation = np.zeros(0) # An empty NumPy array

    this_reward_observation = (0.0, local_observation, False)
    
    #set up q*(a)
    for i in range(0,10):
        q[i]=rand_norm(0.0, 1.0) 
def env_init():
    global this_reward_observation, reward_distribution
    local_observation = np.zeros(0)  # An empty NumPy array

    reward_distribution = np.zeros(10)
    for i in range(10):
        reward_distribution[i] = rand_norm(0.0, 1.0)

    this_reward_observation = (0.0, local_observation, False)
    def __init__(self, number_of_arms):
        super(KArmedTestbed, self).__init__()

        self.number_of_arms = number_of_arms
        self.action_value = np.array(
            [rand_norm(0, 1.0) for i in np.arange(number_of_arms)])
        self.current_state = np.zeros(0)
        self.available_actions = np.array(
            [i for i in np.arange(1, number_of_arms + 1, 1)])
Exemplo n.º 8
0
def createDistribution(
):  # create distributions contains mean of each distribution
    global distributions, numActions
    i = 0
    distributions = []
    while i < 10:
        distribution = rand_norm(0.0, 1.0)
        distributions.append(distribution)
        i += 1
    return
Exemplo n.º 9
0
def env_init():
    global this_reward_observation, bandits, opt_act
    local_observation = np.zeros(0)  # An empty NumPy array
    bandits = np.zeros(10)
    opt_act = 0
    for i in xrange(10):
        bandits[i] = rand_norm(0, 1)
        if bandits[opt_act] < bandits[i]:
            opt_act = i

    this_reward_observation = (0.0, local_observation, False)
Exemplo n.º 10
0
def env_step(
    this_action
):  # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    global mean_q_actions_array
    global this_reward_observation
    action = int(this_action)

    #obtains a reward for the action taken
    #the value is based on a distribution of mean = mean value for that action and variance = 1
    the_reward = rand_norm(mean_q_actions_array[action], 1)
    this_reward_observation = (the_reward, this_reward_observation[1], False)

    #returns (reward, state and if some terminal state was hit)
    return this_reward_observation
Exemplo n.º 11
0
def env_step(this_action): # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    
    global this_reward_observation,real_reward,q
    
    # rewards drawn from (q(At), 1) Gaussian
    for i in range(10):
        real_reward[i] = rand_norm(q[i], 1)
    
    the_reward = real_reward[this_action]
    
    #print "the reward for action %d is %f, optimal is %d,optimal value is %f"%(int(this_action),the_reward,max(enumerate(real_reward),key=lambda x: x[1])[0],real_reward[max(enumerate(real_reward),key=lambda x: x[1])[0]])

    this_reward_observation = (the_reward, this_reward_observation[1], False)

    return this_reward_observation
Exemplo n.º 12
0
def env_init():
    global this_reward_observation
    global mean_q_actions_array
    global num_actions

    #initialize the variables for the enviroment, dependending on the quantity of actions available

    num_actions = 10
    mean_q_actions_array = np.zeros(num_actions)

    #Initialize the mean values for each action utilizing a Gaussian distribution (mean = 0, variance = 1)
    for i in range(0, num_actions):
        mean_q_actions_array[i] = rand_norm(0.0, 1.0)

    local_observation = np.zeros(0)  # An empty NumPy array

    this_reward_observation = (0.0, local_observation, False)
def env_get_reward(this_action):
    global mean_list

    the_reward = rand_norm(mean_list[int(this_action[0])], 1.0)

    return the_reward
Exemplo n.º 14
0
 def _arms(self, k):
     tmp = np.zeros(k)
     for cnt in range(k):
         tmp[cnt] = rand_norm(0, 1)
     # print('q* is {}'.format(tmp))
     return tmp
Exemplo n.º 15
0
def q():
    global qa
    qa = []
    for i in range(num_actions):
        qa.append(rand_norm(0.0, 1.0))
    return
    def do_action(self, action):
        for a in self.available_actions:
            self.action_value[a - 1] += rand_norm(0.0, 0.01)

        return rand_norm(self.action_value[action], 1.0)
 def do_action(self, action):
     # state update goes here
     return rand_norm(self.action_value[action], 1.0)
def env_init():
    global this_reward_observation
    local_observation = np.zeros(0)  # An empty NumPy array
    for i in range(actions):
        qtrue[i] = rand_norm(0.0, 1.0)
    this_reward_observation = (0.0, local_observation, False)
Exemplo n.º 19
0
def env_init():
    global arms_centres

    for i in range(arm_count):
        arms_centres[i] = rand_norm(0, 1)
Exemplo n.º 20
0
 def reward(self, action):
     # print('Action is ' + str(action))
     index = int(action)
     return rand_norm(self._q_array[index], 1)
Exemplo n.º 21
0
def env_init():
    global this_reward_observation, mode, rewards
    local_observation = np.zeros(0) # An empty NumPy array
    for i in range(10):
        rewards[i] = rand_norm(0.0,1.0)
    this_reward_observation = (0.0, local_observation, False)