def test6(): """ Now with memory!""" from numpy import ndarray from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(cheese_maze) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True) algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85) print algo.batchSize rows, cols = 2,3 episodesPerStep = 5 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test4(): from numpy import ndarray from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES, WeightGuessing g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(labyrinth2) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75) #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2,2 episodesPerStep = 4 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test3(): from examples.gridphysics.mazes.simple import consistent_corridor from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(consistent_corridor) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2, 2 episodesPerStep = 3 for i in range(rows * cols): pylab.subplot(rows, cols, i + 1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i + 1) * episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test2(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.optimization import SNES game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True) net = buildNet(game_env.outdim, 6, 2) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.43) rows, cols = 3, 3 episodesPerStep = 2 for i in range(rows * cols): pylab.subplot(rows, cols, i + 1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i + 1) * episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def evaulateGame(): global zelda_game, now_zelda_game, rules #buildNetWork net = buildNetwork(336, 10, 8, hiddenclass=SigmoidLayer) #randomMove avg = 0 oldlink = rules[3] rules[3] = rules[3].replace('random=0', 'random=1') print rules[3] setRule(rules) print "randomPlay" for i in range(10): avg += evaluate(net) """ rules[3] = 'ShootNNSprite stype=sword israndom=1' setRule(rules) print "randomShootNNSprite" for i in range(20): avg += evaluate(net) rules[3] = 'ShootNNSprite stype=bullet israndom=1' setRule(rules) print "randomShootNNSprite" for i in range(20): avg += evaluate(net) """ if (avg / 10.0 > 0.4): return -1, net from pybrain.optimization import SNES from pybrain.optimization import OriginalNES from pybrain.optimization import GA from numpy import ndarray rules[3] = oldlink print "oldlink........." + oldlink setRule(rules) best = 0 print "SNES starting......" algo = SNES(lambda x: evaluate(x), net, verbose=True) #algo = GA(lambda x: evaluate(x), net, verbose=True) #algo = OriginalNES(lambda x: evaluate(x), net, verbose=True, desiredEvaluation=0.85) episodesPerStep = 5 for i in range(5): algo.learn(episodesPerStep) print net.params if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable if algo.bestEvaluation > best: best = algo.bestEvaluation """ if best < 0.1: #too hard return -1,net """ print now_zelda_game return best, net
def trainNetwork(data, n_classes, buildNet, file, seed, max_evaluations, num_samples): # The training functions uses the average of the cumulated reward and maximum height as fitness X_train = data["X_train"] y_train = data["y_train"] def objF(params): nn = buildNet(X_train.shape[1], n_classes) nn._setParameters(np.array(params)) random_state = np.random.get_state() np.random.seed(l.numLearningSteps) sampled_data = np.random.choice(len(X_train), num_samples, replace=False) np.random.set_state(random_state) cur_data = X_train[sampled_data] cur_label = y_train[sampled_data] cum_correct = 0 for example, cor in zip(cur_data, cur_label): result = nn.activate(example) loss_sum = 0 for q, out in enumerate(result): if q != cor: loss_sum += max(0, out - result[int(cor)] + 1) # guess = np.argmax(result) #if guess == cor: #cum_correct += 1 cum_correct += loss_sum nn.reset() return cum_correct # Build net for initial random params n = buildNet(X_train.shape[1], n_classes) learned = n.params testNetwork(data, n_classes, learned, buildNet, 0, file, seed) l = SNES(objF, learned, verbose=False) # l.batchSize = batch_size batch_size = l.batchSize l.maxEvaluations = max_evaluations l.minimize = True for i in xrange((max_evaluations / batch_size)): result = l.learn(additionalLearningSteps=1) learned = result[0] testNetwork(data, n_classes, learned, buildNet, num_samples * (i + 1) * batch_size, file, seed) return learned
def evaulateGame(without=0, iteration=20): global zelda_game, now_zelda_game, rules #buildNetWork net = buildNetwork(336, 10, 8, hiddenclass=SigmoidLayer) from pybrain.optimization import SNES from pybrain.optimization import OriginalNES from pybrain.optimization import GA from numpy import ndarray best = 0 #SNES algo = SNES(lambda x: evaluate(x, iteration=iteration, without=without), net, verbose=True) episodesPerStep = 5 for i in range(4): algo.learn(episodesPerStep) print net.params if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable if algo.bestEvaluation > best: best = algo.bestEvaluation """ if best < 0.1: #too hard return -1,net """ #Standard NES """ net = buildNetwork(108,10,8,hiddenclass=SigmoidLayer) algo = OriginalNES(lambda x: evaluate(x), net, verbose=True) episodesPerStep = 2 for i in range(5): algo.learn(episodesPerStep) print net.params if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable g = open(os.path.dirname(os.path.realpath(__file__))+"/stats"+str(threadnumber)+".txt", 'a+') print >> g,str(net.params) + '\n' + "Standard NES:" + str(algo.bestEvaluation) + '\n' g.close() if algo.bestEvaluation > best: best = algo.bestEvaluation """ print now_zelda_game return best, net
def get_population_size(learned, cmaes): if cmaes: l = CMAES(lambda x: None, learned, verbose=False) return l.batchSize else: l = SNES(lambda x: None, learned, verbose=False) return l.batchSize
def configure_snes(objF, start_params, minimize=False): l = SNES(objF, start_params, verbose=False) l.minimize = minimize def run_snes(): global generation generation = 0 while True: generation += 1 result = l.learn(additionalLearningSteps=1) current_best = result[0] yield current_best return run_snes, l.batchSize
def test3(): from examples.gridphysics.mazes.simple import office_layout_2, consistent_corridor from examples.gridphysics.mazes import polarmaze_game from pybrain.optimization import SNES g = VGDLParser().parseGame(polarmaze_game) g.buildLevel(consistent_corridor) game_env = GameEnvironment(g) net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78) rows, cols = 2,2 episodesPerStep = 3 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def test2(): from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.optimization import SNES game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True) net = buildNet(game_env.outdim, 6, 2) algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.43) rows, cols = 3,3 episodesPerStep = 2 for i in range(rows*cols): pylab.subplot(rows, cols, i+1) algo.learn(episodesPerStep) net._setParameters(algo.bestEvaluable) plotBackground(game_env) plotTrajectories(game_env, net) pylab.title(str((i+1)*episodesPerStep)) if algo.desiredEvaluation <= algo.bestEvaluation: break print pylab.show()
def train_network(X_train, y_train, X_validate, y_validate, X_test, y_test, test_split=0, validate_split=0): file_start = "%d\t%d\t%d" % (seed, test_split, validate_split) n = buildNet(X_train.shape[1], n_classes) learned = n.params population_size = get_population_size(learned, cmaes) evaluations_per_generation = population_size * batch_size num_generations = max_evaluations / (evaluations_per_generation) + 1 # Used to sample a batch with same class ratios from sklearn.cross_validation import StratifiedShuffleSplit sss = StratifiedShuffleSplit(y_train.reshape(-1), num_generations, train_size=batch_size, random_state=seed) train_indices = [batch_index for (batch_index, _) in sss] def objF(params): nn = buildNet(X_train.shape[1], n_classes) nn._setParameters(np.array(params)) cur_data = X_train[train_indices[l.numLearningSteps]] cur_label = y_train[train_indices[l.numLearningSteps]] results = [] for example, cor in zip(cur_data, cur_label): results.append(nn.activate(example)) nn.reset() loss = log_loss(cur_label, results) return loss test_network(X_validate, y_validate, learned, 0, file_start, "val") test_network(X_test, y_test, learned, 0, file_start, "test") l = SNES(objF, learned, verbose=False) if cmaes: l = CMAES(objF, learned, verbose=False) l.minimize = True l.maxEvaluations = num_generations * population_size for generation in xrange(num_generations): result = l.learn(additionalLearningSteps=1) learned = result[0] train_evaluations = (generation + 1) * evaluations_per_generation test_network(X_train[train_indices[generation]], y_train[train_indices[generation]], learned, train_evaluations, file_start, "train") test_network(X_validate, y_validate, learned, train_evaluations, file_start, "val") test_network(X_test, y_test, learned, train_evaluations, file_start, "test") if generation % 100 == 0: f.flush()
rules[57] = 0.8 setRule(rules) #certainGame #start certainInitial() net = buildNetwork(336, 10, 8, hiddenclass=SigmoidLayer) from pybrain.optimization import SNES from pybrain.optimization import OriginalNES from pybrain.optimization import GA from numpy import ndarray best = 0 print "SNES starting......" algo = SNES(lambda x: evaluate(x), net, verbose=True) #algo = GA(lambda x: evaluate(x), net, verbose=True) #algo = OriginalNES(lambda x: evaluate(x), net, verbose=True, desiredEvaluation=0.85) episodesPerStep = 10 for i in range(99999): algo.learn(episodesPerStep) print net.params if isinstance(algo.bestEvaluable, ndarray): net._setParameters(algo.bestEvaluable) else: net = algo.bestEvaluable if algo.bestEvaluation > best: best = algo.bestEvaluation nowgame = template nowgame = nowgame.replace('{red}', str(red)) nowgame = nowgame.replace('{blue}', str(blue))