/
trainEuphoria.py
54 lines (37 loc) · 1.15 KB
/
trainEuphoria.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from scipy import * #@UnusedWildImport
import pylab
import random
import csv
from euphoria import EuphoriaGame
from euphoriatask import EuphoriaTask
from euavnetwork import euActionValueNetwork
from eulearning import EuphoriaLearningAgent
from euphoriarandplayer import EuphoriaRandomPlayer
from pybrain.rl.learners import Q, SARSA #@UnusedImport
from eunfq import NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
import timeit
environment = EuphoriaGame()
controller = euActionValueNetwork(582,113)
learner = NFQ()
agent = EuphoriaLearningAgent(controller,learner)
agentOp = EuphoriaRandomPlayer(environment)
task = EuphoriaTask(agentOp)
experiment = EpisodicExperiment(task, agent)
i = 0
reward = []
while i<1:
tic=timeit.default_timer()
r = experiment.doEpisodes(3)
for ri in r:
reward.append(ri[-1])
with open('rewardList_'+str(i)+'.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows([reward])
# print reward
# agent.learn()
# agent.reset()
toc=timeit.default_timer()
print toc - tic #elapsed time in seconds
i+=1
# print i, reward