Exemplo n.º 1
0
def evolve_atari_network(game, input_size):

    ne = NE.GSP(input_size,18,2,[],True)

    NUM_GENERATIONS = 1000

    best_fitness = -10000000
    best_net = None
    generation = 0

    while generation < NUM_GENERATIONS:
        curr_best_fitness = -10000000

        # test each subpopulation
        for i in range(ne.populationSize):
            sim = Simulator(game)
            currnet = ne.testNet(i)
            fitness = 0

            while sim.running():
                sim.read()
                fitness += sim.reward

                #print "{}, ".format(fitness), 

                currnet.clearCharges()
                currnet.setInputs(sim.objects.reshape(input_size*80,1))
                currnet.activate()
                output = currnet.readOutputs()

                sim.write('{},18\n'.format(np.argmax(output)))

            ne.evaluate(fitness, i)

            if fitness > curr_best_fitness:
                curr_best_fitness = fitness
                #currnet.visualize()

            if fitness > best_fitness:
                best_fitness = fitness
                pickle.dump(currnet, open('nets/{}.net'.format(game),'wb'))
                #best_net = copy.deepcopy(currnet)
                #best_net.visualize()

            print "gen: {}\ti: {}\trew: {}\tbest: {}\t end: {}" \
                   .format(generation, i, fitness,
                           best_fitness, sim.terminated)
            sim.kill()

        with open('nets/{}.curve'.format(game),'a') as curve:
            curve.write(str(curr_best_fitness)+',') 

        print "Gen " + str(generation) + ", Best: " + str(curr_best_fitness) 
        ne.nextGen()
        generation += 1
    print "Generation "+str(generation)+", task complete."
Exemplo n.º 2
0
def run_game(game,
             net,
             result_file, 
             skip_num_frames=0,
             max_num_frames=50000,
             max_secs_without_reward=50000,
             drop_rate=0,
             num_evals=1,
             display_screen=False):

    currnet = cPickle.load(open(net,'r'))
    
    total_fitness = 0
    for e in range(num_evals):
        currnet.clearCharges()
        sim = Simulator(game,
                        currnet.numInput,
                        skip_num_frames,
                        max_num_frames,
                        max_secs_without_reward,
                        display)
        fitness = 0
        i = 0
        action = 0
        while sim.running():
            success = sim.read()
            if not success:
                break
            if sim.reward != 0: print sim.reward
            fitness += sim.reward

            if np.random.random() >= drop_rate: # activate if signal not dropped
                #currnet.clearCharges()
                currnet.setInputs(sim.objects)
                currnet.activate()
                output = currnet.readOutputs()
                if len(output) == 18:
                    action = np.argmax(output)
                else:
                    # compressed repr: output0 is fire; output1-9 are dirs
                    # relies on output functions with range centered around 0.5, e.g., [0,1]
                    action = np.argmax(output[1:]) + 1
                    if action != 1:
                        # add 8 if (fire) and (dir not noop)
                        if output[0] >= 0.5: 
                            action += 8                 
                    else: 
                        # substract 1 if (not fire) and (dir noop)
                        if output[0] < 0.5: action -= 1
            sim.write('{},18\n'.format(action))
            i += 1
        
        total_fitness += fitness
        print "TOTAL FITNESS: "+str(total_fitness)

    avg_fitness = float(total_fitness)/num_evals
    tmp = tempfile.mktemp()
    f = open(tmp, 'w')
    f.write('{}'.format(avg_fitness))
    f.close()

    shutil.move(tmp, result_file)