mySession.loc[iTrial, 'waitingTime'] = k # R mySession.loc[iTrial, 'isRewarded'] = mySession.loc[iTrial, 'isCorrect'] and not mySession.loc[ iTrial, 'isCatch'] and k > f r = float(mySession.loc[iTrial, 'isRewarded']) mySession.loc[iTrial, 'Reward'] = rewMag*r mySession.loc[iTrial, 'trialDur'] = k # f if mySession.loc[iTrial, 'isRewarded'] else k # mySession.loc[iTrial, 'trialDur'] = mySession.loc[iTrial, 'trialDur'] + ITI tau = mySession.loc[iTrial, 'trialDur'] + ITI # + 1e-9 # S' # A' delta_rho = grad_rho(rhoHat,r,tau) delta_l = grad_l(l,r,k,betaHat) rhoHat = rhoHat + learnRateRho * delta_rho l = l + learnRateB * delta_l rhoHat = max(1e-6,rhoHat) mySession.loc[iTrial + 1, 'rho'] = rhoHat mySession.loc[iTrial + 1, 'beta'] = betaHat L_hat.loc[x] = l assert(not np.isnan(mySession.loc[iTrial,:].values.astype(float)).any()), "nan found in {}".format(mySession.columns[np.isnan(mySession.loc[iTrial,:].values.astype(float))].values) assert (rhoHat >= 0), "rhoHat < 0 (={:0.2f})".format(rhoHat) iTrial += 1
mySession.loc[iTrial, 'waitingTime'] = k # R mySession.loc[iTrial, 'isRewarded'] = mySession.loc[iTrial, 'isCorrect'] and not mySession.loc[ iTrial, 'isCatch'] and k > d r = float(mySession.loc[iTrial, 'isRewarded']) mySession.loc[iTrial, 'Reward'] = m*r # tau = ITI + r*d * (1-r)*k tau = d if mySession.loc[iTrial, 'isRewarded'] else k tau = tau + ITI mySession.loc[iTrial, 'trialDur'] = tau # S' # A' delta_rho = grad_rho(rho=rhoHat,r=r,tau=tau,m=m) # rhoHat = max(1e-6, rhoHat + learnRateRho * delta_rho) rhoHat = max(1e-6, rhoHat + (1-(1-learnRateRho)**(tau)) * delta_rho) mySession.loc[iTrial + 1, 'rho'] = rhoHat assert(not np.isnan(mySession.loc[iTrial,:].values.astype(float)).any()), "nan found in {}".format(mySession.columns[np.isnan(mySession.loc[iTrial,:].values.astype(float))].values) iTrial += 1 # break # with open('mod01_cuedPb_nlog10Eta_{:2.1f}.pickle'.format(-np.log10(learnRateRho)),'wb') as fhandle: # pickle.dump(mySession,fhandle,-1) #%% Plotting mySession.loc[:,'early'] = mySession.index < nTrials/2 mySessionLate = mySession.loc[np.logical_not(mySession.early),:].dropna()