Exemplo n.º 1
0
	def training_seq_pred(self,N_trial,d,stop=True,verb=True,policy='eps_greedy',stoc='soft',t_weighted=True):
	
		from TASKS.task_seq_prediction import construct_trial, get_dictionary

		zero = np.zeros((1,self.S))
		
		corr = 0
		conv_tr = 0
		E = np.zeros(N_trial)
		dic_stim, dic_resp = get_dictionary(d)
		convergence=False

		for tr in np.arange(N_trial):

			if verb:
				print('TRIAL N.',tr+1,':\t', end="")

			#self.reset_memory()
			#self.reset_tags()

			S, O = construct_trial(d,0.5)
			o_print = dic_resp[np.argmax(O)]
			s_old = zero
			
			for i in np.arange(np.shape(S)[0]):
				 
				s_inst = S[i:(i+1),:]
				s_trans = self.define_transient(s_inst, s_old)
				s_old = s_inst
				s_print =  dic_stim[np.argmax(s_inst)]

				y_r,y_m,Q = self.feedforward(s_inst, s_trans)
				
				resp_ind,P_vec = self.compute_response(Q,policy,stoc,t_weighted)
				q = Q[0,resp_ind]
		
				z = np.zeros(np.shape(Q))
				z[0,resp_ind] = 1
				r_print = self.dic_resp[resp_ind]

				if verb:
					 print(s_print,end="-")
				
				self.update_tags(s_inst,s_trans,y_r,y_m,z,resp_ind)
				q_old = q
				
				if i==np.shape(S)[0]-1:
					if dic_resp[resp_ind]==o_print:
						r = self.rew_pos
						corr += 1
					else:
						r = self.rew_neg
						E[tr] = 1
						corr = 0
				else:
					r = 0
			
			RPE = r - q_old  # Reward Prediction Error
			self.update_weights(RPE)
			
			if verb:
				print(dic_resp[resp_ind],'\t(',corr,')')

			if corr>=100 and convergence==False:
				conv_tr = tr
				convergence=True
				if stop==True:
					break


		if conv_tr!=0:
			print('SIMULATION CONVERGED AT TRIAL ',conv_tr)		
		
		return E, conv_tr
Exemplo n.º 2
0
	def training_seq_pred_CPT(self,N_trial,d,stop=True,verb=True):
	
		from TASKS.task_seq_prediction_CPT import construct_trial, get_dictionary

		zero = np.zeros((1,self.S))
		
		corr = 0
		conv_tr = 0
		E = np.zeros(N_trial)
		dic_stim, dic_resp = get_dictionary(d)
		convergence=False

		for tr in np.arange(N_trial):

			if verb:
				print('TRIAL N.',tr+1,':\t', end="")

			first = True

			#self.reset_memory()
			#self.reset_tags()

			S, O = construct_trial(d,0.5)
			s_old = zero
			
			for i in np.arange(np.shape(S)[0]):
				 
				s_inst = S[i:(i+1),:]
				s_trans = self.define_transient(s_inst, s_old)
				s_old = s_inst
				s_print =  dic_stim[np.argmax(s_inst)]

				#print(self.sTRACE)

				o = O[i:(i+1),:]
				o_print = dic_resp[np.argmax(o)]

				y_r,y_m,Q = self.feedforward(s_inst, s_trans)
				
				resp_ind,P_vec = self.compute_response(Q,policy,stoc,t_weighted,e_weighted)
				q = Q[0,resp_ind]
				z = np.zeros(np.shape(Q))
				z[0,resp_ind] = 1
				r_print = dic_resp[resp_ind]

				if verb:
					if i==0:
						print(s_print,end="-")
						print(r_print,end="-")
					elif i==np.shape(S)[0]-1:
						print(r_print,'\t(',corr,')')
					else:
						print(r_print,end="-")
	
				if first!=True:
					RPE = (r + self.discount*q) - q_old  # Reward Prediction Error
					self.update_weights(RPE)
				else:
					first = False
				
				self.update_tags(s_inst,s_trans,y_r,y_m,z,resp_ind)
				q_old = q
				
				if r_print==o_print:
					final = (o_print==dic_resp[self.A-2] or o_print==dic_resp[self.A-1])
					r = self.positive_reward_seq_pred(final,self.A-2)
					corr += 1
				else:
					r = self.rew_neg
					E[tr] = 1
					corr = 0

				#print('\t',s_print," - ",r_print,' (',o_print,')','--->',r)
			
			RPE = r - q_old  # Reward Prediction Error
			self.update_weights(RPE)

			if corr>=100 and convergence==False:
				conv_tr = tr
				convergence=True
				if stop==True:
					break
		if conv_tr!=0:
			print('SIMULATION CONVERGED AT TRIAL ',conv_tr)		
		
		return E, conv_tr
Exemplo n.º 3
0
    def training_tXOR(self, N_trial, stop=True, verb=True):

        from TASKS.task_tXOR import construct_trial

        zero = np.zeros((1, self.S))

        corr = 0
        conv_tr = 0
        E = np.zeros(N_trial)
        convergence = False

        for tr in np.arange(N_trial):

            if verb:
                print('TRIAL N.', tr + 1, ':\t', end="")

            self.reset_memory()
            self.reset_tags()

            S, O = construct_trial()
            o_print = self.dic_resp[np.argmax(O)]
            s_old = zero

            for i in np.arange(3):

                s_inst = S[i:(i + 1), :]
                s_trans = self.define_transient(s_inst, s_old)
                s_old = s_inst
                if i != 2:
                    s_print = self.dic_stim[np.argmax(s_inst)]

                y_r, y_m, Q = self.feedforward(s_inst, s_trans)

                resp_ind, _ = self.compute_response(Q)
                q = Q[0, resp_ind]

                z = np.zeros(np.shape(Q))
                z[0, resp_ind] = 1
                r_print = self.dic_resp[resp_ind]

                if verb and i != 2:
                    print(s_print, end="-")

                self.update_tags(s_inst, s_trans, y_r, y_m, z, resp_ind)
                q_old = q

                if i == 2:
                    if self.dic_resp[resp_ind] == o_print:
                        r = self.rew_pos
                        corr += 1
                    else:
                        r = self.rew_neg
                        E[tr] = 1
                        corr = 0
                else:
                    r = 0

            RPE = r - q_old  # Reward Prediction Error
            self.update_weights(RPE)

            if verb:
                print(self.dic_resp[resp_ind], '\t(', corr, ')')

            if corr >= 100 and convergence == False:
                conv_tr = tr
                convergence = True
                if stop == True:
                    break

        if conv_tr != 0:
            print('SIMULATION CONVERGED AT TRIAL ', conv_tr)

        return E, conv_tr