Exemplo n.º 1
0
def symmetryData(data):
    symmetryData = History(data.stateDim, data.actionDim)
    
    for e in data:
        symmetryData.appendEpisode(e)
    
    # for e in data:
    #     for s, a, r, _ in e:
    #         symmetryData.append(-s, -a, r)
    #     symmetryData.newEpisode()
        
    return symmetryData
Exemplo n.º 2
0
    def evaluateEpisodes(self, count, reset=True, exploration=False, visualize=True):
        self.setup()
        
        # tell agent that evaluation is taking place
        self.agent.evaluation = True
    
        currNumEpisodes = len(self.agent.history)

        # disable all explorers and store them for later        
        if not exploration:
            explorers = []
            for a in self.adapters_:
                if isinstance(a, Explorer) and a.active:
                    explorers.append(a)
                    a.active = False
            
        # run experiment for evaluation and store history
        self.runEpisodes(count, reset)
        self.numEpisodes -= count

        # copy the latest episodes to a new history
        history = History(self.agent.history.stateDim, self.agent.history.actionDim)
        history.episodes_ = self.agent.history.episodes_[currNumEpisodes:-1]

        # remove the evaluation histories from the agent
        self.agent.history.episodes_ = self.agent.history.episodes_[:currNumEpisodes] + [self.agent.history.episodes_[-1]]
        
        # enable exploration again if disabled before
        if not exploration:
            for a in explorers:
                a.active = True
        
        # tell agent that evaluation is over
        self.agent.evaluation = False

        if visualize:
            plt.ion()
            plt.clf()
            self.visual_x.append(self.numEpisodes)
            self.visual_y.append(mean([sum(e.rewards) for e in history]))
            plt.plot(self.visual_x, self.visual_y, 'o-', color='black')
            plt.gcf().canvas.draw()
                
        return history
        
        
Exemplo n.º 3
0
def reduceData(data, buffer=4):
    """ goes through dataset and only stores the "interesting"
        samples, the ones where a change in reward has happened.
    """
    reducedHistory = History(data.stateDim, data.actionDim)
    transitions = []
    
    for episode in data:
        rdiff = episode.rewards[1:] - episode.rewards[:-1]
        transitions = set([0, len(episode.rewards)-1])
        transitions.update(where(rdiff != 0)[0])
        buf = set()
        for t in transitions:
            for i in range(max(0, t-buffer), min(t+2+buffer, len(episode.rewards))):
                buf.add(i)

        for b in buf:
            reducedHistory.append(episode.states[b,:], episode.actions[b,:], episode.rewards[b])
        
        reducedHistory.newEpisode()
    
    return reducedHistory