def run(self, start, end): nef.SimpleNode.run(self, start, end) # Get total values from input terminations total_input = util_funcs.zeros(1,self.dimension) for term_str in self.input_terms.keys(): term_obj = self.getTermination(term_str) term_out = term_obj._filtered_values term_mat = self.input_terms[term_str] if( term_mat is None ): term_val = term_out else: term_val = MU.prod(term_mat, term_out) total_input = [total_input[n] + term_val[n] for n in range(self.dimension)] # Get total inhibitory input total_inhib = 0 for term_str in self.inhib_terms.keys(): term_obj = self.getTermination(term_str) term_out = term_obj._filtered_values term_mat = self.inhib_terms[term_str] term_val = MU.prod(term_mat, term_out) total_inhib = total_inhib + term_val # Calculate return value input_mag = util_funcs.norm(total_input) input_sign = cmp(input_mag, 0) inhibd_mag = max(abs(input_mag) + (total_inhib * self.radius), 0) * input_sign if( input_mag != 0 ): self.return_val = [total_input[n] * inhibd_mag / input_mag for n in range(self.dimension)] else: self.return_val = util_funcs.zeros(1, self.dimension) return
def __init__(self, spinn, origin, termination, transform=None): scale = [nn.scale for nn in termination.node.nodes] if transform is None: transform = termination.transform if origin.node.neurons > spinn.max_fan_in: w = optsparse.compute_sparse_weights(origin, termination.node, transform, spinn.max_fan_in) else: w = MU.prod(termination.node.encoders, MU.prod(transform, MU.transpose(origin.decoders))) w = MU.prod(w, 1.0 / termination.node.radii[0]) for i in range(len(w)): for j in range(len(w[i])): w[i][j] *= scale[i] / termination.tau w = MU.transpose(w) self.weights = w self.tau = int(round(termination.tau * 1000)) if self.tau not in spinn.populations[termination.node].taus: spinn.populations[termination.node].taus.append(self.tau) self.pre = spinn.populations[origin.node].name self.post = spinn.populations[termination.node].name
def addPlasticTermination(self, name, matrix, tauPSC, decoder, weight_func=None): """Create a new termination. A new termination is created on each of the ensembles, which are then grouped together. If decoders are not known at the time the termination is created, then pass in an array of zeros of the appropriate size (i.e. however many neurons will be in the population projecting to the termination, by number of dimensions).""" terminations = [] d = 0 dd = self._nodes[0].dimension for n in self._nodes: encoder = n.encoders w = MU.prod(encoder, [ MU.prod(matrix, MU.transpose(decoder))[d + i] for i in range(dd) ]) if weight_func is not None: w = weight_func(w) t = n.addPESTermination(name, w, tauPSC, False) terminations.append(t) d += dd termination = EnsembleTermination(self, name, terminations) self.exposeTermination(termination, name) return self.getTermination(name)
def compute_weight_matrix(self, proj): orig=proj.origin term=proj.termination post=term.node transform=term.transform while hasattr(orig,'getWrappedOrigin'): orig=orig.getWrappedOrigin() decoder=orig.getDecoders() encoder=term.node.getEncoders() # scale by radius encoder=MU.prod(encoder,1.0/post.getRadii()[0]) encoder=MU.prod(encoder, self.weight_scale) # scale by gain for i, n in enumerate(post.nodes): for j in range(len(encoder[i])): encoder[i][j]*=n.scale #encoder=MU.prodElementwise(encoder, [n.scale for n in post.nodes]) w=MU.prod(encoder,MU.prod(transform,MU.transpose(decoder))) return w
def compute_sparse_weights(origin, post, transform, fan_in, noise=0.1, num_samples=100): encoder = post.encoders radius = post.radii[0] if hasattr(transform, 'tolist'): transform = transform.tolist() approx = origin.node.getDecodingApproximator('AXON') # create X matrix X = approx.evalPoints X = MU.transpose([f.multiMap(X) for f in origin.functions]) # create A matrix A = approx.values S = fan_in N_A = len(A) samples = len(A[0]) N_B = len(encoder) w_sparse = np.zeros((N_B, N_A), 'f') noise_sd = MU.max(A) * noise decoder_list = [None for _ in range(num_samples)] for i in range(num_samples): indices = random.sample(range(N_A), S) activity = [A[j] for j in indices] n = [[random.gauss(0, noise_sd) for _ in range(samples)] for j in range(S)] activity = MU.sum(activity, n) activityT = MU.transpose(activity) gamma = MU.prod(activity, activityT) upsilon = MU.prod(activity, X) gamma_inv = pinv(gamma, noise_sd * noise_sd) decoder_list[i] = MU.prod([[x for x in row] for row in gamma_inv], upsilon) for i in range(N_B): ww = MU.prod(random.choice(decoder_list), MU.prod(MU.transpose(transform), encoder[i])) for j, k in enumerate(indices): w_sparse[i, k] = float(ww[j]) / radius return list(w_sparse)
def calc_weights(self, encoder, decoder): self.N1 = len(decoder[0]) self.D = len(decoder) self.N2 = len(encoder) self.getTermination('input').setDimensions(self.N1) self.getOrigin('output').setDimensions(self.N2) self.tables = [] self.histograms = [] for dim in range(self.D): cdfs = [] self.tables.append(make_output_table([e[dim] for e in encoder])) for i in range(self.N1): d = decoder[dim][i] / spike_strength if d < 0: decoder_sign = -1 d = -d else: decoder_sign = 1 histogram = compute_histogram(d, [e[dim] for e in encoder]) cdf = compute_cdf(histogram) cdfs.append((decoder_sign, cdf)) self.histograms.append(cdfs) return numeric.array(MU.prod(encoder, decoder))
def calc_weights(self,encoder,decoder): self.N1=len(decoder[0]) self.D=len(decoder) self.N2=len(encoder) self.getTermination('input').setDimensions(self.N1) self.getOrigin('output').setDimensions(self.N2) self.tables=[] self.histograms=[] for dim in range(self.D): cdfs=[] self.tables.append(make_output_table([e[dim] for e in encoder])) for i in range(self.N1): d=decoder[dim][i]/spike_strength if d<0: decoder_sign=-1 d=-d else: decoder_sign=1 histogram=compute_histogram(d,[e[dim] for e in encoder]) cdf=compute_cdf(histogram) cdfs.append((decoder_sign,cdf)) self.histograms.append(cdfs) return numeric.array(MU.prod(encoder,decoder))
def weights(self, obj, termination, include_gain=False): v = [] for n in obj.nodes: w = n.getTermination(termination).weights if include_gain: w = MU.prod(w, n.scale) v.extend(w) return v
def addPlasticTermination(self,name,matrix,tauPSC,decoder,weight_func=None): """Create a new termination. A new termination is created on each of the ensembles, which are then grouped together. If decoders are not known at the time the termination is created, then pass in an array of zeros of the appropriate size (i.e. however many neurons will be in the population projecting to the termination, by number of dimensions).""" terminations = [] d = 0 dd=self._nodes[0].dimension for n in self._nodes: encoder = n.encoders w = MU.prod(encoder,[MU.prod(matrix,MU.transpose(decoder))[d+i] for i in range(dd)]) if weight_func is not None: w = weight_func(w) t = n.addPESTermination(name,w,tauPSC,False) terminations.append(t) d += dd termination = EnsembleTermination(self,name,terminations) self.exposeTermination(termination,name) return self.getTermination(name)
def __init__(self, spinn, origin, termination, transform = None): scale = [nn.scale for nn in termination.node.nodes] if transform is None: transform = termination.transform if origin.node.neurons>spinn.max_fan_in: w = optsparse.compute_sparse_weights(origin, termination.node, transform, spinn.max_fan_in) else: w = MU.prod(termination.node.encoders,MU.prod(transform,MU.transpose(origin.decoders))) w = MU.prod(w,1.0/termination.node.radii[0]) for i in range(len(w)): for j in range(len(w[i])): w[i][j] *= scale[i] / termination.tau w = MU.transpose(w) self.weights = w self.tau = int(round(termination.tau*1000)) if self.tau not in spinn.populations[termination.node].taus: spinn.populations[termination.node].taus.append(self.tau) self.pre = spinn.populations[origin.node].name self.post = spinn.populations[termination.node].name
def termination_Cycle(self, x): x = x[0] if( self.cyc_opt ): x = 1 - x if( x < 0.025 ): if( self.reset_val < 0.5 ): input_total = zeros(1, self.dimension) for term_name in self.input_terms: termination = self.getTermination(term_name) term_matrix = self.input_mats[term_name] term_output = termination.getOutput() if( isinstance(term_matrix, (int,float,long)) ): input_total = [input_total[n] + term_matrix * term_output[n] for n in range(self.dimension)] else: #term_value = numeric.dot(numeric.array(term_output, typecode='f'), self.input_mats[term_name]) term_value = MU.prod(self.input_mats[term_name], term_output) input_total = [input_total[n] + term_value[n] for n in range(self.dimension)] self.stored_val = deepcopy(input_total)
def run_flat_delivery(args, seed=None): """Runs the model on the delivery task with only one hierarchical level.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_flat_delivery") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 contextD = 2 context_scale = 1.0 max_state_input = 2 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={ -16777216: "wall", -1: "floor", -256: "a", -2088896: "b" }, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input")) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, name="NavTermNode", contextD=2) net.add(nav_term_node) net.connect(env.getOrigin("context"), nav_term_node.getTermination("context")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) reward_relay = net.make("reward_relay", 1, 1, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay) net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay) net.connect(reward_relay, nav_agent.getTermination("reward")) # period to save weights (realtime, not simulation time) weight_save = 600.0 HRLutils.WeightSaveThread( nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = nav_agent.getNode("QNetwork") data.record_avg(env.getOrigin("reward")) data.record_avg(q_net.getNode("actionvals").getOrigin("X")) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view()
def run_flat_delivery(args, seed=None): """Runs the model on the delivery task with only one hierarchical level.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_flat_delivery") if args.has_key("load_weights") and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 contextD = 2 context_scale = 1.0 max_state_input = 2 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] ###ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment(actions, HRLutils.datafile("contextmap.bmp"), colormap={-16777216:"wall", - 1:"floor", - 256:"a", - 2088896:"b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" ###NAV AGENT enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # Connect the agents actions to the environment so the agent can act upon the environment net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # Connect the environment state to the agent, so the agent knows the effect of it's action net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input")) # net.connect(env.getOrigin("reward"), nav_agent.getTermination("reward")) # net.connect(env.getOrigin("optimal_move"), nav_agent.getTermination("bg_input")) # termination node for nav_agent (just a timer that goes off regularly) nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.9)):None}, env, name="NavTermNode", contextD=2) net.add(nav_term_node) net.connect(env.getOrigin("context"), nav_term_node.getTermination("context")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) # WTF why not connect directly? # Maybe this is the only way to make a direct connection between outputs in this version of Nengo? reward_relay = net.make("reward_relay", 1, 1, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay) net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay) net.connect(reward_relay, nav_agent.getTermination("reward")) #save weights weight_save = 600.0 #period to save weights (realtime, not simulation time) HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() #data collection node data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) #data.record_avg(env.getOrigin("reward"), filter=1e-5) #data.record_avg(nav_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=1e-5) #data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=1e-5) #data.record_avg(nav_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=1e-5) # ErrorNetwork is apparently not the correct name and hell if I know what the correct one is #data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error"), filter=1e-5) # Try recording everything net.add_to_nengo() net.view()
def run_contextenvironment(args, seed=None): """Runs the model on the context task. :param args: kwargs for the agent :param seed: random seed """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("runContextEnvironment") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # scale of context representation max_state_input = 2 # max length of input vector for state population # actions (label and vector) available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # context labels and rewards for achieving those context goals rewards = {"a": 1.5, "b": 1.5} env = contextenvironment.ContextEnvironment( actions, HRLutils.datafile("contextmap.bmp"), contextD, rewards, colormap={ -16777216: "wall", -1: "floor", -256: "a", -2088896: "b" }, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # termination node for agent (just goes off on some regular interval) term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): 0.0}, env) net.add(term_node) # generate encoders and divide by max_state_input (so that all inputs # will end up being radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # load eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: print "loading contextbmp_evalpoints_%s.txt" % seed evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(agent) print "agent neurons:", agent.countNeurons() # period to save weights (realtime, not simulation time) weight_save = 600.0 t = HRLutils.WeightSaveThread( agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (agent.name, seed)), weight_save) t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = agent.getNode("QNetwork") data.record(env.getOrigin("reward")) data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(env.getOrigin("state")) net.connect(env.getOrigin("placewcontext"), agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), agent.getTermination("reward")) net.connect(term_node.getOrigin("reset"), agent.getTermination("reset")) net.connect(term_node.getOrigin("learn"), agent.getTermination("learn")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_state")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_action")) net.connect(agent.getOrigin("action_output"), env.getTermination("action")) # net.add_to_nengo() # net.run(2000) net.view() t.stop()
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None): """Runs the model on the delivery task. :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__) :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__) :param tag: string appended to datafiles associated with this run :param seed: random seed used for this run """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED if tag is None: tag = str(seed) net = nef.Network("runDeliveryEnvironment", seed=seed) stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # relative scale of context vector vs state vector max_state_input = 2 # maximum length of input vector to state population # labels and vectors corresponding to basic actions available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] if "load_weights" in navargs and navargs["load_weights"] is not None: navargs["load_weights"] += "_%s" % tag if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None: ctrlargs["load_weights"] += "_%s" % tag # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={ -16777216: "wall", -1: "floor", -256: "a", -2088896: "b" }, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT # generate encoders and divide them by max_state_input (so that inputs # will be scaled down to radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # read in eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **navargs) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # output of nav_agent is what goes to the environment net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # termination node for nav_agent (just a timer that goes off regularly) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, contextD=2, name="NavTermNode") net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) # ##CTRL AGENT # actions corresponding to "go to A" or "go to B" actions = [("a", [0, 1]), ("b", [1, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="CtrlAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **ctrlargs) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() # ctrl_agent gets environmental state and reward net.connect(env.getOrigin("placewcontext"), ctrl_agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), ctrl_agent.getTermination("reward")) # termination node for ctrl_agent (terminates whenever the agent is in the # state targeted by the ctrl_agent) # also has a long timer so that ctrl_agent doesn't get permanently stuck # in one action ctrl_term_node = terminationnode.TerminationNode( { "a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None }, env, contextD=2, name="CtrlTermNode", rewardval=1.5) net.add(ctrl_term_node) # reward for nav_agent is the pseudoreward from ctrl_agent termination net.connect(ctrl_term_node.getOrigin("pseudoreward"), nav_agent.getTermination("reward")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) # connect ctrl_agent action to termination context # this is used so that ctrl_term_node knows what the current goal is (to # determine termination and pseudoreward) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_term_node.getTermination("context")) # state input for nav_agent is the environmental state + the output of # ctrl_agent ctrl_output_relay = net.make("ctrl_output_relay", 1, len(env.placecells) + contextD, mode="direct") ctrl_output_relay.fixMode() trans = (list(MU.I(len(env.placecells))) + [[0 for _ in range(len(env.placecells))] for _ in range(contextD)]) net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, transform=([[0 for _ in range(contextD)] for _ in range(len(env.placecells))] + list(MU.I(contextD)))) net.connect(ctrl_output_relay, nav_agent.getTermination("state_input")) # periodically save the weights # period to save weights (realtime, not simulation time) weight_save = 600.0 threads = [ HRLutils.WeightSaveThread( nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, tag)), weight_save), HRLutils.WeightSaveThread( ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, tag)), weight_save) ] for t in threads: t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % tag)) net.add(data) data.record(env.getOrigin("reward")) q_net = ctrl_agent.getNode("QNetwork") data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view() for t in threads: t.stop()
def run_badreenvironment(nav_args, ctrl_args, seed=None, flat=False): if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_badreenvironment") env = badreenvironment.BadreEnvironment(flat=flat) net.add(env) ###NAV AGENT stateN = 500 max_state_input = 2 enc = env.gen_encoders(stateN, 0, 1.0) enc = MU.prod(enc, 1.0 / max_state_input) # with open(HRLutils.datafile("badre_evalpoints.txt")) as f: # evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] orientations = MU.I(env.num_orientations) shapes = MU.I(env.num_shapes) colours = MU.I(env.num_colours) evals = list(MU.I(env.stateD)) + \ [o+s+c for o in orientations for s in shapes for c in colours] nav_agent = smdpagent.SMDPAgent(stateN, env.stateD, env.actions, name="NavAgent", load_weights=None, state_encoders=enc, state_evals=evals, discount=0.4, **nav_args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() nav_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None}, env, name="NavTermNode", state_delay=0.1) net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) ###CTRL AGENT enc = env.gen_encoders(stateN, 0, 0) enc = MU.prod(enc, 1.0 / max_state_input) actions = [("shape", [0, 1]), ("orientation", [1, 0]), ("null", [0, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, env.stateD, actions, name="CtrlAgent", load_weights=None, state_encoders=enc, state_evals=evals, discount=0.4, **ctrl_args) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() net.connect(env.getOrigin("state"), ctrl_agent.getTermination("state_input")) ctrl_term_node = terminationnode.TerminationNode({terminationnode.Timer((0.6, 0.6)):None}, env, name="CtrlTermNode", state_delay=0.1) net.add(ctrl_term_node) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) ## reward for nav/ctrl reward_relay = net.make("reward_relay", 1, 2, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay, transform=[[1], [0]]) net.connect(ctrl_agent.getOrigin("action_output"), reward_relay, transform=[[0, 0], [1, 1]]) # nav reward is just environment net.connect(reward_relay, nav_agent.getTermination("reward"), func=lambda x: x[0], origin_name="nav_reward") # ctrl gets a slight bonus if it selects a rule (as opposed to null), to encourage it not # to just pick null all the time net.connect(reward_relay, ctrl_agent.getTermination("reward"), func=lambda x: x[0]+0.25*abs(x[0]) if x[1] > 0.5 else x[0], origin_name="ctrl_reward") ## state for navagent controlled by ctrlagent # ctrl_output_relay = net.make("ctrl_output_relay", 1, env.stateD+2, mode="direct") # ctrl_output_relay.fixMode() ctrl_output_relay = net.make_array("ctrl_output_relay", 50, env.stateD, radius=2, mode=HRLutils.SIMULATION_MODE) ctrl_output_relay.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) inhib_matrix = [[0,-5]]*50*env.num_orientations + \ [[-5,0]]*50*env.num_shapes + \ [[-5,-5]]*50*env.num_colours # ctrl output inhibits all the non-selected aspects of the state net.connect(env.getOrigin("state"), ctrl_output_relay) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, # transform=zip([0]*env.num_orientations + [-1]*(env.num_shapes+env.num_colours), # [-1]*env.num_orientations + [0]*env.num_shapes + [-1]*env.num_colours)) transform=inhib_matrix) # also give a boost to the selected aspects (so that neurons are roughly equally activated). # adding 2/3 to each element (base vector has length 3, inhibited vector has length 1, so add 2/3*3 --> 3) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, transform=zip([0.66]*env.num_orientations + [0]*(env.num_shapes+env.num_colours), [0]*env.num_orientations + [0.66]*env.num_shapes + [2]*env.num_colours)) net.connect(ctrl_output_relay, nav_agent.getTermination("state_input")) # save weights weight_save = 600.0 # period to save weights (realtime, not simulation time) HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, seed)), weight_save).start() # data collection node data = datanode.DataNode(period=5, show_plots=None, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) filter = 1e-5 net.add(data) data.record_avg(env.getOrigin("reward"), filter=filter) data.record_avg(ctrl_agent.getNode("QNetwork").getNode("actionvals").getOrigin("X"), filter=filter) data.record_sparsity(ctrl_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter) data.record_sparsity(nav_agent.getNode("QNetwork").getNode("state_pop").getOrigin("AXON"), filter=filter) data.record_avg(ctrl_agent.getNode("QNetwork").getNode("valdiff").getOrigin("X"), filter=filter) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error"), filter=filter) data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("0").getOrigin("AXON"), filter=filter) data.record_avg(ctrl_agent.getNode("BGNetwork").getNode("weight_actions").getNode("1").getOrigin("AXON"), filter=filter) net.add_to_nengo() # net.view() net.run(2000)
def termination_action(self, a, pstc=0.01): # set the selected action to the one with highest similarity to the # available actions self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
def make(net, preName='pre', postName='post', rate=5e-4): # get pre and post ensembles from their names pre = net.network.getNode(preName) post = net.network.getNode(postName) dim_pre = pre.getDimension() dim_post = post.getDimension() t = [[0] * dim_pre for i in range(dim_post)] index_pre = range(dim_pre) index_post = range(dim_post) for i in range(max(len(index_pre),len(index_post))): ipre = index_pre[i % len(index_pre)] ipost = index_post[i % len(index_post)] t[ipost][ipre] = 1 decoder = pre.getOrigin('X').getDecoders() encoder = post.getEncoders() encoder = MU.prod(encoder, 1.0 / post.getRadii()[0]) weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder))) # random weight matrix to initialize projection from pre to post # def rand_weights(w): # for i in range(len(w)): # for j in range(len(w[0])): # w[i][j] = random.uniform(-1e-3,1e-3) # return w # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist()) # non-decoded termination (to learn transformation) count = 0 prename = pre.getName() while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]: count = count + 1 prename = '%s_%02d' % (prename, count) post.addBCMTermination(prename, weight, 0.005, False, None) # Add projections net.connect(pre.getOrigin('AXON'),post.getTermination(prename)) # Set learning rule on the non-decoded termination net.learn(post,prename,None,rate=rate) if net.network.getMetaData("bcmterm") == None: net.network.setMetaData("bcmterm", HashMap()) bcmterms = net.network.getMetaData("bcmterm") bcmterm = HashMap(4) bcmterm.put("preName", preName) bcmterm.put("postName", postName) bcmterm.put("rate", rate) bcmterms.put(prename, bcmterm) if net.network.getMetaData("templates") == None: net.network.setMetaData("templates", ArrayList()) templates = net.network.getMetaData("templates") templates.add(prename) if net.network.getMetaData("templateProjections") == None: net.network.setMetaData("templateProjections", HashMap()) templateproj = net.network.getMetaData("templateProjections") templateproj.put(preName, postName)
componentRMS = math.sqrt(1.0 / len(frequencies)); signal = FourierFunction(frequencies, MU.uniform(1, len(frequencies), componentRMS/.707)[0], MU.random(1, len(frequencies), IndicatorPDF(-.5, .5))[0]) noiseBandwidth = 500 for network in networks: network.setMode(SimulationMode.DIRECT); network.setStepSize(.0005); signalPower = [] noisePower = [] for t in tau: network.setTau(t) network.setInputFunction(signal); network.clearErrors(); network.reset(0) network.run(0, 10) signalPower.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0)) network.setInputFunction(ConstantFunction(1, 0)); network.setNoise(1000, 1000); network.reset(0) network.run(0, 10); network.clearErrors(); noisePower.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0)) Plotter.plot(tau, signalPower, "%s signal power" %network.getName()); Plotter.plot(tau, noisePower, "%s noise power" %network.getName()); network.setStepSize(.001);
def run_flat_delivery(args, seed=None): """Runs the model on the delivery task with only one hierarchical level.""" if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("run_flat_delivery") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 contextD = 2 context_scale = 1.0 max_state_input = 2 actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) net.connect(env.getOrigin("placewcontext"), nav_agent.getTermination("state_input")) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, name="NavTermNode", contextD=2) net.add(nav_term_node) net.connect(env.getOrigin("context"), nav_term_node.getTermination("context")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) reward_relay = net.make("reward_relay", 1, 1, mode="direct") reward_relay.fixMode() net.connect(env.getOrigin("reward"), reward_relay) net.connect(nav_term_node.getOrigin("pseudoreward"), reward_relay) net.connect(reward_relay, nav_agent.getTermination("reward")) # period to save weights (realtime, not simulation time) weight_save = 600.0 HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, seed)), weight_save).start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = nav_agent.getNode("QNetwork") data.record_avg(env.getOrigin("reward")) data.record_avg(q_net.getNode("actionvals").getOrigin("X")) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(nav_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view()
def termination_context(self, c, pstc=0.01): self.context = max(self.contexts, key=lambda x: MU.prod(HRLutils.normalize(c), HRLutils.normalize(x[1])))
def run_deliveryenvironment(navargs, ctrlargs, tag=None, seed=None): """Runs the model on the delivery task. :param navargs: kwargs for the nav_agent (see SMDPAgent.__init__) :param ctrlargs: kwargs for the ctrl_agent (see SMDPAgent.__init__) :param tag: string appended to datafiles associated with this run :param seed: random seed used for this run """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED if tag is None: tag = str(seed) net = nef.Network("runDeliveryEnvironment", seed=seed) stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # relative scale of context vector vs state vector max_state_input = 2 # maximum length of input vector to state population # labels and vectors corresponding to basic actions available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] if "load_weights" in navargs and navargs["load_weights"] is not None: navargs["load_weights"] += "_%s" % tag if "load_weights" in ctrlargs and ctrlargs["load_weights"] is not None: ctrlargs["load_weights"] += "_%s" % tag # ##ENVIRONMENT env = deliveryenvironment.DeliveryEnvironment( actions, HRLutils.datafile("contextmap.bmp"), colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # ##NAV AGENT # generate encoders and divide them by max_state_input (so that inputs # will be scaled down to radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # read in eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % tag)) as f: evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] nav_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="NavAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **navargs) net.add(nav_agent) print "agent neurons:", nav_agent.countNeurons() # output of nav_agent is what goes to the environment net.connect(nav_agent.getOrigin("action_output"), env.getTermination("action")) # termination node for nav_agent (just a timer that goes off regularly) nav_term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): None}, env, contextD=2, name="NavTermNode") net.add(nav_term_node) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("reset")) net.connect(nav_term_node.getOrigin("learn"), nav_agent.getTermination("learn")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_state")) net.connect(nav_term_node.getOrigin("reset"), nav_agent.getTermination("save_action")) # ##CTRL AGENT # actions corresponding to "go to A" or "go to B" actions = [("a", [0, 1]), ("b", [1, 0])] ctrl_agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, name="CtrlAgent", state_encoders=enc, state_evals=evals, state_threshold=0.8, **ctrlargs) net.add(ctrl_agent) print "agent neurons:", ctrl_agent.countNeurons() # ctrl_agent gets environmental state and reward net.connect(env.getOrigin("placewcontext"), ctrl_agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), ctrl_agent.getTermination("reward")) # termination node for ctrl_agent (terminates whenever the agent is in the # state targeted by the ctrl_agent) # also has a long timer so that ctrl_agent doesn't get permanently stuck # in one action ctrl_term_node = terminationnode.TerminationNode( {"a": [0, 1], "b": [1, 0], terminationnode.Timer((30, 30)): None}, env, contextD=2, name="CtrlTermNode", rewardval=1.5) net.add(ctrl_term_node) # reward for nav_agent is the pseudoreward from ctrl_agent termination net.connect(ctrl_term_node.getOrigin("pseudoreward"), nav_agent.getTermination("reward")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("reset")) net.connect(ctrl_term_node.getOrigin("learn"), ctrl_agent.getTermination("learn")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_state")) net.connect(ctrl_term_node.getOrigin("reset"), ctrl_agent.getTermination("save_action")) # connect ctrl_agent action to termination context # this is used so that ctrl_term_node knows what the current goal is (to # determine termination and pseudoreward) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_term_node.getTermination("context")) # state input for nav_agent is the environmental state + the output of # ctrl_agent ctrl_output_relay = net.make("ctrl_output_relay", 1, len(env.placecells) + contextD, mode="direct") ctrl_output_relay.fixMode() trans = (list(MU.I(len(env.placecells))) + [[0 for _ in range(len(env.placecells))] for _ in range(contextD)]) net.connect(env.getOrigin("place"), ctrl_output_relay, transform=trans) net.connect(ctrl_agent.getOrigin("action_output"), ctrl_output_relay, transform=([[0 for _ in range(contextD)] for _ in range(len(env.placecells))] + list(MU.I(contextD)))) net.connect(ctrl_output_relay, nav_agent.getTermination("state_input")) # periodically save the weights # period to save weights (realtime, not simulation time) weight_save = 600.0 threads = [ HRLutils.WeightSaveThread(nav_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (nav_agent.name, tag)), weight_save), HRLutils.WeightSaveThread(ctrl_agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (ctrl_agent.name, tag)), weight_save)] for t in threads: t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % tag)) net.add(data) data.record(env.getOrigin("reward")) q_net = ctrl_agent.getNode("QNetwork") data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(ctrl_agent.getNode("ErrorNetwork").getOrigin("error")) # net.add_to_nengo() # net.run(10000) net.view() for t in threads: t.stop()
def run_contextenvironment(args, seed=None): """Runs the model on the context task. :param args: kwargs for the agent :param seed: random seed """ if seed is not None: HRLutils.set_seed(seed) seed = HRLutils.SEED net = nef.Network("runContextEnvironment") if "load_weights" in args and args["load_weights"] is not None: args["load_weights"] += "_%s" % seed stateN = 1200 # number of neurons to use in state population contextD = 2 # dimension of context vector context_scale = 1.0 # scale of context representation max_state_input = 2 # max length of input vector for state population # actions (label and vector) available to the system actions = [("up", [0, 1]), ("right", [1, 0]), ("down", [0, -1]), ("left", [-1, 0])] # context labels and rewards for achieving those context goals rewards = {"a": 1.5, "b": 1.5} env = contextenvironment.ContextEnvironment( actions, HRLutils.datafile("contextmap.bmp"), contextD, rewards, colormap={-16777216: "wall", -1: "floor", -256: "a", -2088896: "b"}, imgsize=(5, 5), dx=0.001, placedev=0.5) net.add(env) print "generated", len(env.placecells), "placecells" # termination node for agent (just goes off on some regular interval) term_node = terminationnode.TerminationNode( {terminationnode.Timer((0.6, 0.9)): 0.0}, env) net.add(term_node) # generate encoders and divide by max_state_input (so that all inputs # will end up being radius 1) enc = env.gen_encoders(stateN, contextD, context_scale) enc = MU.prod(enc, 1.0 / max_state_input) # load eval points from file with open(HRLutils.datafile("contextbmp_evalpoints_%s.txt" % seed)) as f: print "loading contextbmp_evalpoints_%s.txt" % seed evals = [[float(x) for x in l.split(" ")] for l in f.readlines()] agent = smdpagent.SMDPAgent(stateN, len(env.placecells) + contextD, actions, state_encoders=enc, state_evals=evals, state_threshold=0.8, **args) net.add(agent) print "agent neurons:", agent.countNeurons() # period to save weights (realtime, not simulation time) weight_save = 600.0 t = HRLutils.WeightSaveThread(agent.getNode("QNetwork").saveParams, os.path.join("weights", "%s_%s" % (agent.name, seed)), weight_save) t.start() # data collection node data = datanode.DataNode(period=5, filename=HRLutils.datafile("dataoutput_%s.txt" % seed)) net.add(data) q_net = agent.getNode("QNetwork") data.record(env.getOrigin("reward")) data.record(q_net.getNode("actionvals").getOrigin("X"), func=max) data.record(q_net.getNode("actionvals").getOrigin("X"), func=min) data.record_sparsity(q_net.getNode("state_pop").getOrigin("AXON")) data.record_avg(q_net.getNode("valdiff").getOrigin("X")) data.record_avg(env.getOrigin("state")) net.connect(env.getOrigin("placewcontext"), agent.getTermination("state_input")) net.connect(env.getOrigin("reward"), agent.getTermination("reward")) net.connect(term_node.getOrigin("reset"), agent.getTermination("reset")) net.connect(term_node.getOrigin("learn"), agent.getTermination("learn")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_state")) net.connect(term_node.getOrigin("reset"), agent.getTermination("save_action")) net.connect(agent.getOrigin("action_output"), env.getTermination("action")) # net.add_to_nengo() # net.run(2000) net.view() t.stop()
def termination_action(self, a, pstc=0.01): # set the selected action to the one with highest similarity to the # current action input self.action = max(self.actions, key=lambda x: MU.prod(a, x[1]))
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None): """Build ActionValues network. :param name: name of Network :param N: base number of neurons :param stateN: number of neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learning rate for PES rule :param Qradius: expected radius of Q values :param init_decoders: if specified, will be used to initialize the connection weights to whatever function is specified by decoders """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = N self.learningrate = learningrate self.supervision = 1.0 # don't use the unsupervised stuff at all self.tauPSC = 0.007 modterms = [] learnterms = [] # relays output = net.make("output", 1, len(actions), mode="direct") output.fixMode() for i, action in enumerate(actions): # create one population corresponding to each action act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac()) act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) # add error termination modterm = act_pop.addDecodedTermination( "error", [[0 if j != i else 1 for j in range(len(actions))]], 0.005, True) # set modulatory transform so that it selects one dimension of # the error signal # create learning termination if init_decoders is not None: weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders)) else: weights = [[ random.uniform(-1e-3, 1e-3) for j in range(stateN) ] for i in range(act_pop.getNeurons())] learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None) # initialize the learning rule net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision) # connect each action back to output relay net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))], pstc=0.001) # note, we learn all the Q values with radius 1, then just # multiply by the desired Q radius here modterms += [modterm] learnterms += [learningterm] # use EnsembleTerminations to group the individual action terminations # into one multi-dimensional termination self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state") self.exposeTermination(EnsembleTermination(self, "error", modterms), "error") self.exposeOrigin(output.getOrigin("X"), "X")
def __init__(self, name, N, stateN, actions, learningrate, Qradius=1.0, init_decoders=None): """Build ActionValues network. :param name: name of Network :param N: base number of neurons :param stateN: number of neurons in state population :param actions: actions available to the system :type actions: list of tuples (action_name,action_vector) :param learningrate: learning rate for PES rule :param Qradius: expected radius of Q values :param init_decoders: if specified, will be used to initialize the connection weights to whatever function is specified by the decoders """ self.name = name net = nef.Network(self, seed=HRLutils.SEED, quick=False) self.N = N self.learningrate = learningrate self.supervision = 1.0 # don't use the unsupervised stuff at all self.tauPSC = 0.007 modterms = [] learnterms = [] # relays output = net.make("output", 1, len(actions), mode="direct") output.fixMode() for i, action in enumerate(actions): # create one population corresponding to each action act_pop = net.make("action_" + action[0], self.N * 4, 1, node_factory=HRLutils.node_fac()) act_pop.fixMode([SimulationMode.DEFAULT, SimulationMode.RATE]) # add error termination modterm = act_pop.addDecodedTermination("error", [[0 if j != i else 1 for j in range(len(actions))]], 0.005, True) # set modulatory transform so that it selects one dimension of the error signal # create learning termination if init_decoders != None: weights = MU.prod(act_pop.getEncoders(), MU.transpose(init_decoders)) else: weights = [[random.uniform(-1e-3, 1e-3) for j in range(stateN)] for i in range(act_pop.getNeurons())] learningterm = act_pop.addHPESTermination("learning", weights, 0.005, False, None) # initialize the learning rule net.learn(act_pop, learningterm, modterm, rate=self.learningrate, supervisionRatio=self.supervision) # connect each action back to output relay net.connect(act_pop.getOrigin("X"), output, transform=[[0] if j != i else [Qradius] for j in range(len(actions))], pstc=0.001) # note, we learn all the Q values with radius 1, then just multiply by the desired Q radius here modterms += [modterm] learnterms += [learningterm] # use EnsembleTerminations to group the individual action terminations into one multi-dimensional termination self.exposeTermination(EnsembleTermination(self, "state", learnterms), "state") self.exposeTermination(EnsembleTermination(self, "error", modterms), "error") self.exposeOrigin(output.getOrigin("X"), "X")
from ca.nengo.util import MU from java.io import File import math nInput = range(200, 2001, 400) nDiff = 1000; networks = [interneuron, dualTC, adapting, depressing, butterworth, interneuronFeedback] exporter = MatlabExporter() for network in networks: network.setInputFunction(ConstantFunction(1, 0)); network.setStepSize(.0001) network.setMode(SimulationMode.DIRECT); inputVariance = []; outputVariance = []; for n in nInput: network.setNoise(n, nDiff); #network.setDistortion(n, nDiff); network.reset(0) network.run(0, 10); inputVariance.append(MU.variance(MU.prod(network.getInputEnsembleData().getValues(), [1]), 0)) outputVariance.append(MU.variance(MU.prod(network.getOutputData().getValues(), [1]), 0)) network.clearErrors(); Plotter.plot(nInput, outputVariance, "output") exporter.write(File("noise.mat"));
def make(net, preName='pre', postName='post', rate=5e-4): # get pre and post ensembles from their names pre = net.network.getNode(preName) post = net.network.getNode(postName) dim_pre = pre.getDimension() dim_post = post.getDimension() t = [[0] * dim_pre for i in range(dim_post)] index_pre = range(dim_pre) index_post = range(dim_post) for i in range(max(len(index_pre), len(index_post))): ipre = index_pre[i % len(index_pre)] ipost = index_post[i % len(index_post)] t[ipost][ipre] = 1 decoder = pre.getOrigin('X').getDecoders() encoder = post.getEncoders() encoder = MU.prod(encoder, 1.0 / post.getRadii()[0]) weight = MU.prod(encoder, MU.prod(t, MU.transpose(decoder))) # random weight matrix to initialize projection from pre to post # def rand_weights(w): # for i in range(len(w)): # for j in range(len(w[0])): # w[i][j] = random.uniform(-1e-3,1e-3) # return w # weight = rand_weights(numeric.zeros((post.neurons, pre.neurons)).tolist()) # non-decoded termination (to learn transformation) count = 0 prename = pre.getName() while '%s_%02d' % (prename, count) in [t.name for t in post.terminations]: count = count + 1 prename = '%s_%02d' % (prename, count) post.addBCMTermination(prename, weight, 0.005, False, None) # Add projections net.connect(pre.getOrigin('AXON'), post.getTermination(prename)) # Set learning rule on the non-decoded termination net.learn(post, prename, None, rate=rate) if net.network.getMetaData("bcmterm") == None: net.network.setMetaData("bcmterm", HashMap()) bcmterms = net.network.getMetaData("bcmterm") bcmterm = HashMap(4) bcmterm.put("preName", preName) bcmterm.put("postName", postName) bcmterm.put("rate", rate) bcmterms.put(prename, bcmterm) if net.network.getMetaData("templates") == None: net.network.setMetaData("templates", ArrayList()) templates = net.network.getMetaData("templates") templates.add(prename) if net.network.getMetaData("templateProjections") == None: net.network.setMetaData("templateProjections", HashMap()) templateproj = net.network.getMetaData("templateProjections") templateproj.put(preName, postName)