/
learnerCustom.py
30 lines (23 loc) · 1.29 KB
/
learnerCustom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# A modified version of LearningAgent
# The idea is to make the best choice based on multiple outputs from a neural network
# The original code only supports self.module.activate returning a single value, an action
from pybrain.rl.agents import LearningAgent
from pybrain.rl.agents.logging import LoggingAgent
class CustLearningAgent(LearningAgent):
def getAction(self):
""" Activate the module with the last observation, add the exploration from
the explorer object and store the result as last action. """
LoggingAgent.getAction(self)
# Here is where the table or neural network returns the action
# This consists of the values of the different actions
# We choose the action with highet value
from numpy import argmax, size
tempAction = self.module.activate(self.lastobs)
if (tempAction.size > 1):
bestAction = argmax(tempAction)
self.lastaction = [bestAction]
else: # Original Code (used still for stuff like table lookup)
self.lastaction = self.module.activate(self.lastobs)
if self.learning:
self.lastaction = self.learner.explore(self.lastobs, self.lastaction)
return self.lastaction