def random_location(self, avoid=[]): """Pick a random location, avoiding regions with the specified labels.""" pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0), random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0)) # Keep generating a random number until you reach somewhere that you're not supposed to avoid while any([self.is_in(pt, s) for s in avoid]): pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0), random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0)) return pt
def random_location(self, avoid=[]): """Pick a random location, avoiding regions with the specified labels.""" pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0), random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0)) while any([self.is_in(pt, s) for s in avoid]): pt = (random.uniform(-self.imgsize[0] / 2.0, self.imgsize[0] / 2.0), random.uniform(-self.imgsize[1] / 2.0, self.imgsize[1] / 2.0)) return pt
def reset(self): self.time = random.uniform(self.period[0], self.period[1])
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None): """Build ActionValues network. :param name: name of Network :param N: base number of neurons :param stateN: number of neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learning rate for PES rule :param Qradius: expected radius of Q values :param init_decoders: if specified, will be used to initialize the connection weights to whatever function is specified by decoders """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = N self.learningrate = learningrate self.supervision = 1.0 # don't use the unsupervised stuff at all self.tauPSC = 0.007 modterms = [] learnterms = [] # relays output = net.make("output", 1, len(actions), mode="direct") output.fixMode() for i, action in enumerate(actions): # create one population corresponding to each action act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac()) act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) # add error termination modterm = act_pop.addDecodedTermination( "error", [[0 if j != i else 1 for j in range(len(actions))]], 0.005, True) # set modulatory transform so that it selects one dimension of # the error signal # create learning termination if init_decoders is not None: weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders)) else: weights = [[ random.uniform(-1e-3, 1e-3) for j in range(stateN) ] for i in range(act_pop.getNeurons())] learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None) # initialize the learning rule net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision) # connect each action back to output relay net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))], pstc=0.001) # note, we learn all the Q values with radius 1, then just # multiply by the desired Q radius here modterms += [modterm] learnterms += [learningterm] # use EnsembleTerminations to group the individual action terminations # into one multi-dimensional termination self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state") self.exposeTermination(EnsembleTermination(self, "error", modterms), "error") self.exposeOrigin(output.getOrigin("X"), "X")
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None): """Build ActionValues network. :param name: name of Network :param N: base number of neurons :param stateN: number of neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learning rate for PES rule :param Qradius: expected radius of Q values :param init_decoders: if specified, will be used to initialize the connection weights to whatever function is specified by the decoders """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = N self.learningrate = learningrate self.supervision = 1.0 # don't use the unsupervised stuff at all self.tauPSC = 0.007 modterms = [] learnterms = [] # relays output = net.make("output", 1, len(actions), mode="direct") output.fixMode() for i, action in enumerate(actions): # create one population corresponding to each action act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac()) act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) # add error termination modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]], 0.005, True) # set modulatory transform so that it selects one dimension of the error signal # create learning termination if init_decoders != None: weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders)) else: weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())] learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None) # initialize the learning rule net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision) # connect each action back to output relay net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))], pstc=0.001) # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here modterms += [modterm] learnterms += [learningterm] # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state") self.exposeTermination(EnsembleTermination(self, "error", modterms), "error") self.exposeOrigin(output.getOrigin("X"), "X")