示例#1
0
    def random_location(self, avoid=[]):
        """Pick a random location, avoiding regions with the specified labels."""

        pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0),
              random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0))
        # Keep generating a random number until you reach somewhere that you're not supposed to avoid
        while any([self.is_in(pt, s) for s in avoid]):
            pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0),
                  random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0))

        return pt
示例#2
0
    def random_location(self, avoid=[]):
        """Pick a random location, avoiding regions with the specified
        labels."""

        pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0),
              random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0))

        while any([self.is_in(pt, s) for s in avoid]):
            pt = (random.uniform(-self.imgsize[0] / 2.0,
                                 self.imgsize[0] / 2.0),
                  random.uniform(-self.imgsize[1] / 2.0,
                                 self.imgsize[1] / 2.0))

        return pt
示例#3
0
    def random_location(self, avoid=[]):
        """Pick a random location, avoiding regions with the specified
        labels."""

        pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0),
              random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0))

        while any([self.is_in(pt, s) for s in avoid]):
            pt = (random.uniform(-self.imgsize[0] / 2.0,
                                 self.imgsize[0] / 2.0),
                  random.uniform(-self.imgsize[1] / 2.0,
                                 self.imgsize[1] / 2.0))

        return pt
示例#4
0
 def reset(self):
     self.time = random.uniform(self.period[0], self.period[1])
示例#5
0
    def __init__(self,
                 name,
                 N,
                 stateN,
                 actions,
                 learningrate,
                 Qradius=1.0,
                 init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the
            connection weights to whatever function is specified by decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0  # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0],
                               self.N * 4,
                               1,
                               node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination(
                "error", [[0 if j != i else 1 for j in range(len(actions))]],
                0.005, True)
            # set modulatory transform so that it selects one dimension of
            # the error signal

            # create learning termination
            if init_decoders is not None:
                weights = MU.prod(act_pop.getEncoders(),
                                  MU.transpose(init_decoders))
            else:
                weights = [[
                    random.uniform(-1e-3, 1e-3) for j in range(stateN)
                ] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights,
                                                      0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop,
                      learningterm,
                      modterm,
                      rate=self.learningrate,
                      supervisionRatio=self.supervision)

            # connect each action back to output relay
            net.connect(act_pop.getOrigin("X"),
                        output,
                        transform=[[0] if j != i else [Qradius]
                                   for j in range(len(actions))],
                        pstc=0.001)
            # note, we learn all the Q values with radius 1, then just
            # multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations
        # into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms),
                               "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms),
                               "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
示例#6
0
    def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None):
        """Build ActionValues network.

        :param name: name of Network
        :param N: base number of neurons
        :param stateN: number of neurons in state population
        :param actions: actions available to the system
            :type actions: list of tuples (action_name,action_vector)
        :param learningrate: learning rate for PES rule
        :param Qradius: expected radius of Q values
        :param init_decoders: if specified, will be used to initialize the connection
            weights to whatever function is specified by the decoders
        """

        self.name = name
        net = nef.Network(self, seed=HRLutils.SEED, quick=False)

        self.N = N
        self.learningrate = learningrate
        self.supervision = 1.0 # don't use the unsupervised stuff at all

        self.tauPSC = 0.007

        modterms = []
        learnterms = []

        # relays  
        output = net.make("output", 1, len(actions), mode="direct")
        output.fixMode()

        for i, action in enumerate(actions):
            # create one population corresponding to each action
            act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac())
            act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE])

            # add error termination
            modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]],
                                                    0.005, True)
                # set modulatory transform so that it selects one dimension of the error signal

            # create learning termination
            if init_decoders != None:
                weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders))
            else:
                weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())]
            learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None)

            # initialize the learning rule
            net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision)

            # connect each action back to output relay  
            net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))],
                        pstc=0.001)
                # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here

            modterms += [modterm]
            learnterms += [learningterm]

        # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination
        self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state")
        self.exposeTermination(EnsembleTermination(self, "error", modterms), "error")

        self.exposeOrigin(output.getOrigin("X"), "X")
示例#7
0
 def reset(self):
     self.time = random.uniform(self.period[0], self.period[1])