-
Notifications
You must be signed in to change notification settings - Fork 0
/
runEvo.py
69 lines (47 loc) · 1.52 KB
/
runEvo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#import the RL libraries
from scipy import *
import sys, time,pickle
from PygameEnvironment import TwentyFortyEightEnvironment
from pybrain.tools.shortcuts import buildNetwork
from pybrain.optimization import GA #HillClimber
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.rl.environments import Task
#set the learning time
learning_eps = 150
#set the batch size
games_per_ep = 75
population_size = 150
# make the environment
environment = TwentyFortyEightEnvironment()
environment.meansize = 1.0/(population_size*games_per_ep)
#the task is the game this time
task = environment
#create our network
controller = buildNetwork(task.nsenses, 20, task.nactions)
#use a Genetic Algorithm
#all the commented out lines are options you can play with
learner = GA(populationSize=population_size
, topProportion=0.05
, elitism=True
, eliteProportion=0.5
, mutationProb=0.05
, mutationStdDev=0.3
, tournament=False
, tournamentSize=2
)
agent = OptimizationAgent(controller, learner)
#set up an experiment
experiment = EpisodicExperiment(task, agent)
meanscores = []
for i in xrange(learning_eps):
print i
experiment.doEpisodes(games_per_ep)
meanscores.append(learner.bestEvaluation)
f = open("bestEvo.pkl",'w')
pickle.dump(learner.bestEvaluable,f)
f.close()
import matplotlib.pyplot as plt
plt.plot(meanscores)
plt.title("Best Agent Score vs Generations:")
plt.show()