Exemplo n.º 1
0
    def gen_encoders(self, N, contextD, context_scale):
        """Generate encoders for state population of learning agent.

        :param N: number of neurons in state population
        :param contextD: dimension of context vector representation
        :param context_scale: weight on context representation relative to
            state (1.0 = equal weighting)
        """

        if contextD > 0:
            contexts = MU.I(contextD)
        else:
            contexts = [[]]

        # neurons each sensitive to different combinations of stimuli
        encs = (list(MU.I(self.stateD)) +
                [o + s + c
                 for o in MU.I(self.num_orientations)
                 for s in MU.I(self.num_shapes)
                 for c in MU.I(self.num_colours)])

        return [HRLutils.normalize(
            HRLutils.normalize(random.choice(encs)) +
            [x * context_scale for x in random.choice(contexts)])
            for _ in range(N)]
Exemplo n.º 2
0
    def tick(self):
        self.reward *= 0.97 # note: doing a slow decay rather than setting to 0, to put
                            # a bit more power in the reward signal
        
        data = self.reader.readline()
        while data is not None:
            data = data.split()
            if data[0] == "state":
                self.state = [float(data[2]), float(data[3 + self.playernum])]
                self.state = [2 * self.state_radius * x / self.max_y - self.state_radius for x in self.state]
                self.optimal_move = 1 if float(data[2]) > float(data[4-self.playernum]) else -1
            elif data[0] == "reward" and int(data[1]) == self.playernum:
                self.reward = float(data[2]) * self.rewardscale
            elif data[0] == "stats":
                self.stats = [float(x) for x in data[1:]]
#            else:
#                print "unrecognized input (%s)" % data

            data = self.reader.readline()
            
        self.place_activations = self.calc_activations(self.state, self.place_dev)

#        print "state:", self.state
        print >> self.p.stdin, "move %d %f" % (1 - self.playernum, random.choice([self.optimal_move*1,1,-1]))
        print >> self.p.stdin, "move %d %f" % (self.playernum, self.mapping[self.action[0]])
Exemplo n.º 3
0
    def __init__(self, actions, mapname, contextD, context_rewards, **kwargs):
        """Initialize the environment variables.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param mapname: filename for map file
        :param contextD: dimension of vector representing context
        :param context_rewards: mapping from region labels to rewards for being
            in that region (each entry represents one context)
            :type context_rewards: dict {"regionlabel":rewardval,...}
        :param **kwargs: see PlaceCellEnvironment.__init__
        """

        PlaceCellEnvironment.__init__(self,
                                      actions,
                                      mapname,
                                      name="ContextEnvironment",
                                      **kwargs)

        self.rewards = context_rewards

        # generate vectors representing each context
        self.contexts = {}  # mapping from region label to context vector
        for i, r in enumerate(self.rewards):
            self.contexts[r] = list(MU.I(contextD)[i])

        self.context = self.contexts[random.choice(self.contexts.keys())]

        # randomly pick a new context every context_delay seconds
        self.context_delay = 60
        self.context_update = self.context_delay

        self.create_origin("placewcontext",
                           lambda: self.place_activations + self.context)
        self.create_origin("context", lambda: self.context)
Exemplo n.º 4
0
    def __init__(self, actions, mapname, contextD, context_rewards, **kwargs):
        """Initialize the environment variables.

        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param mapname: filename for map file
        :param contextD: dimension of vector representing context
        :param context_rewards: mapping from region labels to rewards for being in that
            region (each entry represents one context)
            :type context_rewards: dict {"regionlabel":rewardval,...}
        :param **kwargs: see PlaceCellEnvironment.__init__
        """

        PlaceCellEnvironment.__init__(self, actions, mapname, name="ContextEnvironment", **kwargs)

        self.rewards = context_rewards

        # generate vectors representing each context
        self.contexts = {} # mapping from region label to context vector
        for i, r in enumerate(self.rewards):
#            self.contexts[r] = list(RandomHypersphereVG().genVectors(1, contextD)[0])
            self.contexts[r] = list(MU.I(contextD)[i])

        self.context = self.contexts[random.choice(self.contexts.keys())]

        # randomly pick a new context every context_delay seconds
        self.context_delay = 60
        self.context_update = self.context_delay

        self.create_origin("placewcontext", lambda: self.place_activations + self.context)
        self.create_origin("context", lambda: self.context)
Exemplo n.º 5
0
    def tick(self):
        """Update state/reward each timestep."""

        # present stimuli
        if (self.t > self.presentationperiod[0] and
                self.t < self.presentationperiod[1] and
                self.state == [0 for _ in range(self.stateD)]):
            # pick a random stimuli at beginning of presentation period
            # and set that as the current state for the duration of
            # the presentation period
            self.state = random.choice(self.answers.keys())
            self.answer = self.answers[self.state]
            self.state = list(self.state)

        # provide feedback if in reward period
        if self.t > self.rewardperiod[0] and self.t < self.rewardperiod[1]:
            self.reward = (self.rewardval if self.action[0] == self.answer
                           else -self.rewardval)
        else:
            self.reward = 0

        # update score
        if ((self.t + self.rewardtime) % (self.presentationtime +
                                          self.rewardtime)) < 0.002:
            self.correct = self.correct[1:] + ([1.0] if self.action[0] ==
                                               self.answer else [0.0])

        # update presentation/reward period
        if (self.t % (self.presentationtime + self.rewardtime)) < 0.002:
            self.presentationperiod = [self.t, self.t + self.presentationtime]
            self.rewardperiod = [self.t + self.presentationtime, self.t +
                                 self.presentationtime + self.rewardtime]
            self.state = [0 for _ in range(self.stateD)]
Exemplo n.º 6
0
    def gen_encoders(self, N, contextD, context_scale):
        """Generates encoders for state population in RL agent.

        State aspect of encoders comes from PlaceCellEnvironment. Context component is a
        unit vector with contextD dimensions and length context_scale.
        """

        s_encoders = PlaceCellEnvironment.gen_encoders(self, N)
#        c_encoders = RandomHypersphereVG(True,1.0,1.0).genVectors(N, d) #0.7
        c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)]
        c_encoders = [[x * context_scale for x in enc] for enc in c_encoders]
        encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)]
        encoders = [[x / math.sqrt(sum([y ** 2 for y in e])) for x in e] for e in encoders]
        return encoders
Exemplo n.º 7
0
    def gen_encoders(self, N, contextD, context_scale):
        """Generates encoders for state population in RL agent.

        State aspect of encoders comes from PlaceCellEnvironment. Context
        component is a unit vector with contextD dimensions and length
        context_scale.
        """

        s_encoders = PlaceCellEnvironment.gen_encoders(self, N)
        c_encoders = [random.choice(MU.I(contextD)) for _ in range(N)]
        c_encoders = [[x * context_scale for x in enc] for enc in c_encoders]
        encoders = [s + list(c) for s, c in zip(s_encoders, c_encoders)]
        encoders = [[x / math.sqrt(sum([y**2 for y in e])) for x in e]
                    for e in encoders]
        return encoders
Exemplo n.º 8
0
    def __init__(self, flat=False):
        """Set up task parameters.

        :param flat: if True, no hierarchical relationship between stimuli and
            reward; if False, stimuli-response rewards will be dependent on
            colour
        """

        self.rewardval = 1.5

        # actions correspond to three different button presses
        actions = [("left", [1, 0, 0]), ("middle", [0, 1, 0]),
                   ("right", [0, 0, 1])]

        # number of instances of each attribute (stimuli formed through
        # different combinations of attribute instances)
        self.num_orientations = 3
        self.num_shapes = 3
        self.num_colours = 2

        self.presentationtime = 0.5  # length of time to present each stimuli
        self.rewardtime = 0.1  # length of reward period

        # next presentation interval
        self.presentationperiod = [0, self.presentationtime]

        # next reward interval
        self.rewardperiod = [self.presentationtime,
                             self.presentationtime + self.rewardtime]

        self.answer = random.choice(actions)[0]  # answer selected by agent

        self.stateD = (self.num_orientations + self.num_shapes +
                       self.num_colours)

        self.correct = [0] * 20

        EnvironmentTemplate.__init__(self, "BadreEnvironment", self.stateD,
                                     actions)

        self.answers = self.gen_answers(flat)

        self.create_origin("optimal_move",
                           lambda: [a[1] for a in actions
                                    if a[0] == self.answer][0])
        self.create_origin("score",
                           lambda: [sum(self.correct) / len(self.correct)])
Exemplo n.º 9
0
 def update_context(self):
     if self.t > self.context_update:
         self.context = self.contexts[random.choice(self.contexts.keys())]
         self.context_update = self.t + self.context_delay
Exemplo n.º 10
0
 def update_context(self):
     if self.t > self.context_update:
         self.context = self.contexts[random.choice(self.contexts.keys())]
         self.context_update = self.t + self.context_delay