def construct_input(self): """ Turn obs into a vector; uses coding defined in 'inputs.py' """ # input consists of: observation and time t self.x_sens = inputs.get_obs(self.obs, self.t) #self.x = np.append(self.x_sens,1.0) #add bias unit self.x = self.x_sens # sensory input is mapped onto a latent variable # x -> l (sigmoid layer) l_in = self.W_lx.dot(self.x) self.l_sens = self.transfer(l_in) #apply sigmoid transformation #self.l_sens = l_in # Compute match value - S equal in each block! Snew = self.W_Sl.dot(self.l_sens) Sold = self.S.reshape((self.nblocks, self.block_size)) m = np.zeros(self.nblocks) for aa in range(self.nblocks): m[aa] = 1 - WorkMATe.match(Snew, Sold[aa, :]) # add match nodes + bias to latent input vector self.l = np.r_[self.l_sens, m, 1.0] return
def construct_input(self): """ Turn obs into a vector; uses coding defined in 'inputs.py' """ # input consists of: observation and time t self.x_sens = inputs.get_obs(self.obs, self.t) #self.x = np.append(self.x_sens,1.0) #add bias unit #works faster without bias unit # random input encoding self.l_in = self.W_lx.dot(self.x_sens) self.l_sens = l_sens = self.l_in #self.l_sens = l_sens = self.transfer_r(self.l_in) #learn a lot better without sigmoid activation # Compute match value: Sproj = self.W_Sl.dot(self.l_sens).reshape( (self.nblocks, self.block_size)) matches = 1 - scipy.spatial.distance.cdist( Sproj, self.S.reshape( Sproj.shape), metric=WorkMATe.match).diagonal() # add match nodes + bias to input vector self.l = np.r_[l_sens, matches, 1.0] return
def construct_input(self): """ Turn obs into a vector; uses coding defined in 'inputs.py' """ # input consists of: observation and time t self.x_sens = inputs.get_obs(self.obs, self.t) self.x = np.append(self.x_sens, 1.0) #add bias neuron #x -> l (sigmoid layer) l_in = self.W_lx.dot(self.x) self.l_sens = l_sens = self.transfer(l_in) # Compute match value Sproj = self.W_Sl.dot(l_sens).reshape((self.nblocks, self.block_size)) # Compute match value: #Sproj = self.W_Sx.dot(x_sens).reshape( (self.nblocks, self.block_size) ) #this could be done much neater than it is done now.. matches = 1 - scipy.spatial.distance.cdist( Sproj, self.S.reshape( Sproj.shape), metric=WorkMATe.match).diagonal() # add match nodes + bias to input vector self.l = np.r_[l_sens, matches, 1.0] return
def construct_input(self): """ Turn obs into a vector; uses coding defined in 'inputs.py' """ # input consists of: observation and time t self.x_sens = inputs.get_obs(self.obs, self.t) self.x = np.r_[self.x_sens, self.bias] #only bias included return
def construct_input(self): """ Turn obs into a vector; uses coding defined in 'inputs.py' """ # input consists of: observation and time t self.x_sens = x_sens = inputs.get_obs(self.obs, self.t) # Compute match value: Sproj = self.W_Sx.dot(x_sens).reshape((self.nblocks, self.block_size)) matches = 1 - scipy.spatial.distance.cdist( Sproj, self.S.reshape( Sproj.shape), metric=WorkMATe.match2).diagonal() # add match nodes + bias to input vector self.x = np.r_[x_sens, matches, 1.0] return
def construct_input(self): """ Turn obs into a vector; uses coding defined in 'inputs.py' """ # input consists of: observation and time t self.x_sens = inputs.get_obs(self.obs, self.t) self.x = np.append(self.x_sens, 1.0) #add bias unit # sensory input is mapped onto a latent variable # x -> l (sigmoid layer) l_in = self.W_lx.dot(self.x) self.l_sens = self.transfer(l_in) # Compute match value: Sproj = self.W_Sl.dot(self.l_sens).reshape( (self.nblocks, self.block_size)) matches = 1 - scipy.spatial.distance.cdist( Sproj, self.S.reshape( Sproj.shape), metric=WorkMATe.match).diagonal() # add match nodes + bias to latent input vector self.l = np.r_[self.l_sens, matches, 1.0] return
def __init__(self, env=None, nhidden=20, nblocks=2, block_size=20): super(WorkMATe, self).__init__() assert env is not None self.env = env ## learning params (adopted from Rombouts et al., 2015) self.beta = 0.15 #self.beta2 = 0.015 self.gamma = 0.90 self.L = 0.8 # exploration: self.epsilon = 0.025 self.bias = 1 ## member lambda functions: # sigmoid transfer function, offset at 2.5 sigmoid_offset = 2.5 self.transfer = lambda x: 1 / (1. + np.exp(sigmoid_offset - x)) self.dtransfer = lambda x: x * (1. - x) # derivative #Relu activation function self.transfer_r = lambda x: np.maximum(x, 0) self.dtransfer_r = lambda x: np.greater(x, 0).astype(int) #tanh activation function tan = 2.5 self.transfer_t = lambda x: np.tanh(x - tan) self.dtransfer_t = lambda x: 1 - np.tanh(x - tan)**2 # softmax normalization; for action selection - boltzmann controller self.softmaxnorm = lambda x: (np.exp(x - x.max()) / np.exp(x - x.max()) .sum()) ## init network architecture -- inputs and output shape from env # input and hidden nx = inputs.get_obs('a').size nl = block_size nh = nhidden # memory cell properties: self.nblocks = nblocks self.block_size = block_size nS = nblocks * block_size # output -- q layer consisting of 2 modules # module for n external actions, internal actions for nblocks + 1 (null) mod_sz = env.n_actions, nblocks + 1 nq = np.sum(mod_sz) # indices of module for each node: self.zmods = np.hstack([[i] * sz for i, sz in enumerate(mod_sz)]) ## init network layers (activations 0) # (x will be constructed when processing 'new_obs') self.S = np.zeros(nS) self.l = np.zeros(nl) self.h = np.zeros(nh) self.q = np.zeros(nq) ## init weights, tags traces, (+1 indicates projection from bias node) wl, wh = -.5, .5 # Input projection with bias node self.W_lx = np.random.sample((nl, nx + 1)) * (wh - wl) + wl self.W_lx_start = np.copy(self.W_lx) # Memory projection (x > S) self.W_Sx = np.random.sample((nS, nx)) * (wh - wl) + wl # Note that time and sensory input cells are not separated in memory # PLASTIC CONNECTIONS (all except memory projection) wl, wh = -.5, .5 # connections l -> h; nl + match nodes + bias self.W_hl = np.random.sample((nh, nl + nblocks + 1)) * (wh - wl) + wl self.W_hl_start = np.copy(self.W_hl) # connections S->h self.W_hS = np.random.sample((nh, nS)) * (wh - wl) + wl # connections h->q: self.W_qh = np.random.sample((nq, nh + 1)) * (wh - wl) + wl # tags are shaped like weights but initialized at 0: zeros_ = np.zeros_like self.Tag_W_lx, self.Trace_W_lx = zeros_(self.W_lx), zeros_(self.W_lx) self.Tag_W_hl, self.Trace_W_hl = zeros_(self.W_hl), zeros_(self.W_hl) self.Tag_W_hS, self.Trace_W_hS = zeros_(self.W_hS), zeros_(self.W_hS) self.Tag_W_qh, self.Trace_W_qh = zeros_(self.W_qh), zeros_(self.W_qh) # Init action state self.action = -1 # (prev) predicted reward: self.qat_1 = self.qat = None self.t = 0 return
def __init__(self, env=None, nhidden=20): super(PreMATe, self).__init__() assert env is not None self.env = env ## learning params (adopted from Rombouts et al., 2015) self.beta = 0.4 self.gamma = 0.90 self.L = 0.8 # exploration rate self.epsilon = 0.025 self.bias = 1 ## member lambda functions: # sigmoid transfer function, offset at 2.5 sigmoid_offset = 2.5 self.transfer = lambda x: 1 / (1. + np.exp(sigmoid_offset - x)) self.dtransfer = lambda x: x * (1. - x) # derivative # softmax normalization; for action selection - boltzmann controller self.softmaxnorm = lambda x: (np.exp(x - x.max()) / np.exp(x - x.max()) .sum()) ## init network architecture -- inputs and output shape from env # input and hidden nx = inputs.get_obs('a').size nl = nhidden nh = nhidden # output -- q layer consisting of as many nodes as inputs that need to be discriminated nq = np.sum(len(all_stim)) ## init network layers (activations 0) # (x will be constructed when processing 'new_obs') self.l = np.zeros(nl) self.h = np.zeros(nh) self.q = np.zeros(nq) ## init weights, tags traces, (+1 indicates projection from bias node) # ALL PLASTIC CONNECTIONS wl, wh = -.5, .5 # Input projection with bias node # connections x -> l self.W_lx = np.random.sample((nl, nx + 1)) * (wh - wl) + wl self.W_lx_start = np.copy(self.W_lx) # connections l -> h; nl + match nodes + bias self.W_hl = np.random.sample((nh, nl + 1)) * (wh - wl) + wl self.W_hl_start = np.copy(self.W_hl) # connections h->q: self.W_qh = np.random.sample((nq, nh + 1)) * (wh - wl) + wl self.W_qh_start = np.copy(self.W_qh) # tags are shaped like weights but initialized at 0: zeros_ = np.zeros_like self.Tag_W_lx, self.Trace_W_lx = zeros_(self.W_lx), zeros_(self.W_lx) self.Tag_W_hl, self.Trace_W_hl = zeros_(self.W_hl), zeros_(self.W_hl) self.Tag_W_qh, self.Trace_W_qh = zeros_(self.W_qh), zeros_(self.W_qh) # Init action state self.action = -1 # (prev) predicted reward: self.qat_1 = self.qat = None self.t = 0 return
def __init__(self, env=None, nhidden=15, nblocks=2, block_size=15): super(WorkMATe, self).__init__() assert env is not None self.env = env ## learning params (adopted from Rombouts et al., 2015) self.beta = 0.15 self.gamma = 0.90 self.L = 0.8 # exploration: self.epsilon = 0.025 ## member lambda functions: # sigmoid transfer function, offset at 2.5 sigmoid_offset = 2.5 self.transfer = lambda x: 1 / (1. + np.exp(sigmoid_offset - x)) self.dtransfer = lambda x: x * (1. - x) # derivative # softmax normalization; for action selection - boltzmann controller self.softmaxnorm = lambda x: (np.exp(x - x.max()) / np.exp(x - x.max()) .sum()) ## init network architecture -- inputs and output shape from env # input, latent and hidden nx = inputs.get_obs('a').size nl = block_size #latent layer has size memory block nh = nhidden # memory cell properties: self.nblocks = nblocks self.block_size = block_size nS = nblocks * block_size # output -- q layer consisting of 2 modules # module for n external actions, internal actions for nblocks + 1 (null) mod_sz = env.n_actions, nblocks + 1 nq = np.sum(mod_sz) # indices of module for each node: self.zmods = np.hstack([[i] * sz for i, sz in enumerate(mod_sz)]) ## init network layers (activations 0) # (x will be constructed when processing 'new_obs') self.l = np.zeros(nl) self.S = np.zeros(nS) self.h = np.zeros(nh) self.q = np.zeros(nq) ## init weights, tags traces, (+1 indicates projection from bias node) wl, wh = -.50, .50 #Input projection (x > l); nx + bias self.W_lx = np.random.sample((nl, nx + 1)) * (wh - wl) + wl # Memory projection (l > S) # Note that time and sensory input cells are not separated in memory # this projection is not random but a fixed one-on-one mapping W_Sl = np.identity(nl) for i in range(nblocks - 1): self.W_Sl = np.vstack((W_Sl, np.identity(nl))) # PLASTIC CONNECTIONS (all except memory projection) wl, wh = -.25, .25 # connections x->h; nx + match nodes + bias self.W_hl = np.random.sample((nh, nl + nblocks + 1)) * (wh - wl) + wl # connections S->h; self.W_hS = np.random.sample((nh, nS)) * (wh - wl) + wl # connections h->q; self.W_qh = np.random.sample((nq, nh + 1)) * (wh - wl) + wl # tags are shaped like weights but initialized at 0: zeros_ = np.zeros_like self.Tag_W_hl, self.Trace_W_hl = zeros_(self.W_hl), zeros_(self.W_hl) self.Tag_W_hS, self.Trace_W_hS = zeros_(self.W_hS), zeros_(self.W_hS) self.Tag_W_qh, self.Trace_W_qh = zeros_(self.W_qh), zeros_(self.W_qh) #ADDED BY LJC self.Tag_W_Sl, self.Trace_W_Sl = zeros_(self.W_Sl), zeros_(self.W_Sl) # Init action state self.action = -1 # (prev) predicted reward: self.qat_1 = self.qat = None self.t = 0 return