Exemplo n.º 1
0
def showAverageQValues(p, v, nEpisodes, path):
    '''Colorplot for the average QValues'''
    lrAgent = loadEpisodeVar(p, v, 0, path, 'lrAgent')
    ny, nx = shape(lrAgent.x)
    avgValues = zeros((ny, nx))
    for e in xrange(nEpisodes):
        if e != 0: lrAgent = loadEpisodeVar(p, v, e, path, 'lrAgent')
        avgValues += lrAgent.x


#      pdb.set_trace()
    avgValues = avgValues / nEpisodes
    figure()
    title('Average Q-values over the trials- p=' + str(p) + ' v=' + str(v))

    dlbd = lrAgent.lbd[1] - lrAgent.lbd[0]
    last = lrAgent.lbd[-1] + dlbd
    X = r_[
        lrAgent.lbd,
        last]  #watch out with this limit here "last" (pcolor doesn't show the last column)
    Y = arange(ny + 1)
    Z = avgValues
    pcolor(X, Y, Z)
    colorbar()
    axis([lrAgent.lbd[0], last, 0, ny + 1])
    xlabel('Learning rates')
    ylabel('Trials')
Exemplo n.º 2
0
def showActions(p, v, epis, path):
    '''Plot agent actions'''
    figure()
    agent = loadEpisodeVar(p, v, epis, path, 'agent')
    plot(agent.actions, '.', label="action")
    title('Actions')
    ylim(-.05, 1.05)
    xlabel('Trials')
Exemplo n.º 3
0
def showAgentDynamics(prob, vol, epis, path):
    '''Plot agent's errors, rewards and learning rates from saved data'''
    agent = loadEpisodeVar(prob, vol, epis, path, 'agent')
    xlabel('Trials')
    plot(agent.err, '.', label="error")
    plot(agent.r, 'o', label="reward")
    plot(agent.lr, 'x', label="LR", markersize=5)
    ylim(ymax=1.1)
    legend()
Exemplo n.º 4
0
def showPredictions(prob, vol, epis, path):
    '''Temporal evolution of agent's prediction in comparison with the real value'''
    environment = loadArrangeVar(prob, vol, path, 'environment')
    agent = loadEpisodeVar(prob, vol, epis, path, 'agent')

    title('Environment and Predictions - p=' + str(prob) + ' v=' + str(vol) +
          ' episode ' + str(epis))
    xlabel('Trials')
    ylim(-.05, 1.05)
    plot(environment.history, label="probability")
    plot(agent.x, label="value")
    legend()
Exemplo n.º 5
0
def showQTrace(p, v, e, path):
    '''
   DEFINITION: plot Q-values in the last trial of the episode
   INPUTS:
   p: probability
   v: volatility
   e: episode
   path: location of the file
   '''
    lrAgent = loadEpisodeVar(p, v, e, path, 'lrAgent')
    figure()
    plot(lrAgent.x[-1], 'o')
    title('Q-values - p=' + str(p) + ' v=' + str(v))
    xlabel('Learning rates')
Exemplo n.º 6
0
def showAverageQTrace(p, v, nEpisodes, path):
    '''
   DEFINITION: plot the Q-values in the last trial averaged over all episodes
   INPUTS:
   p: probability
   v: volatility
   nEpisodes: number of episodes
   path: location of the file
   '''
    figure()
    for e in xrange(nEpisodes):
        lrAgent = loadEpisodeVar(p, v, e, path, 'lrAgent')
        if e == 0: allQs = array([lrAgent.x[-1]])
        else: allQs = r_[allQs, [lrAgent.x[-1]]]
    meanQ = genStat(allQs, mean)
    stdQ = genStat(allQs, std)
    errorbar(lrAgent.lbd, meanQ, yerr=stdQ, fmt='o')
    title('Averaged Q-values - p=' + str(p) + ' v=' + str(v))
    xlabel('Learning rates')
Exemplo n.º 7
0
def showQValues(p, v, epis, path):
    '''Colorplot for the QValues'''
    lrAgent = loadEpisodeVar(p, v, epis, path, 'lrAgent')
    figure()
    title('Q-values over the trials')

    dlbd = lrAgent.lbd[1] - lrAgent.lbd[0]
    ny, nx = shape(lrAgent.x)
    last = lrAgent.lbd[-1] + dlbd
    X = r_[
        lrAgent.lbd,
        last]  #watch out with this limit here "last" (pcolor doesn't show the last column)
    Y = arange(ny + 1)

    Z = lrAgent.x
    pcolor(X, Y, Z)
    colorbar()
    axis([lrAgent.lbd[0], last, 0, ny + 1])
    xlabel('Learning rates')
    ylabel('Trials')
Exemplo n.º 8
0
lbdMean = np.zeros((len(prob), len(vol), nEpisodes))
lbdStd = np.zeros((len(prob), len(vol), nEpisodes))
meanSquareError = np.zeros((len(prob), len(vol), nEpisodes))
rightEstimate = np.zeros((len(prob), len(vol), nEpisodes))
rightPrediction = np.zeros((len(prob), len(vol), nEpisodes))
rewardedTrials = np.zeros((len(prob), len(vol), nEpisodes))
totalIter = len(prob) * len(vol) * nEpisodes

n = 1  #iterations counter
now = datetime.now()
print 'Simulation started. Date:', now.strftime("%Y-%m-%d %H:%M:%S")
for v in xrange(len(vol)):
    for p in xrange(len(prob)):
        environment = loadArrangeVar(prob[p], vol[v], path, 'environment')
        for e in xrange(nEpisodes):
            lrAgent = loadEpisodeVar(prob[p], vol[v], e, path, 'lrAgent')
            agent = loadEpisodeVar(prob[p], vol[v], e, path, 'agent')
            chosenLRs = [lrAgent.lbd[a] for a in lrAgent.actions]
            lbdMean[p][v][e] = mean(chosenLRs)
            lbdStd[p][v][e] = std(
                chosenLRs
            )  #low std indicates the algorithm chooses a optimum learning rate - convergence
            meanSquareError[p][v][e] = mean(lrAgent.r)
            rightEstimate[p][v][e] = np.sum(
                around(agent.x[1:]) == around(environment.history)
            ) / environment.history.size * 100  #x has a shape of nTrials+1 and history of nTrials. That is because after the last trial the agent learns the value of x for the next
            rightPrediction[p][v][e] = np.sum(
                around(agent.x[0:-1]) == around(
                    environment.history)) / environment.history.size * 100
            rewardedTrials[p][v][e] = float(
                np.sum(agent.r)