示例#1
0
def main():

    # initiates trainer, actions are these predictions of the agent:
    # action 1 -> right box will fall off the edge
    # action 0 -> right box will not fall off
    trainer = TpgTrainer(actions=[0, 1], teamPopSize=50)

    _min, _max, _avg = [], [], []  # hold values for every generation

    for gen in range(GENERATIONS):  # generation loop
        print("Generation: ", gen + 1, "/", GENERATIONS)
        curScores = []  # new list per gen

        while True:  # loop to go through agents
            agent = trainer.getNextAgent()
            if agent is None:
                break  # no more agents, so proceed to next gen

            # evaluting the agent
            score = 0
            for i in range(EVALUATIONS):
                score += evaluateAgent(agent)
            agent.reward(score)

            curScores.append(score)

        print("Min:", min(curScores), "   Max:", max(curScores), "   Avg:",
              sum(curScores) / len(curScores),
              "(out of " + str(EVALUATIONS) + ")\n")

        _min.append(min(curScores))
        _max.append(max(curScores))
        _avg.append(sum(curScores) / len(curScores))

        trainer.evolve()

    # getting best agent after all the generations
    best_agent, best_score = getBestAgent(trainer)

    print("Best agent's score:", best_score, "/", EVALUATIONS)

    for run in range(FINAL_RUNS):
        print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r')
        evaluateAgent(best_agent, graphics=True)

    # plotting progress over the generations
    generations = range(1, GENERATIONS + 1)

    axes = plt.gca()
    axes.set_ylim([0, EVALUATIONS])

    plt.plot(generations, _min, label="min")
    plt.plot(generations, _max, label="max")
    plt.plot(generations, _avg, label="avg")

    plt.xlabel("generation")
    plt.ylabel("score")
    plt.legend()
    plt.show()
def runTpg():

    tmpEnv = gym.make(options.envName)
    # create TPG
    trainer = TpgTrainer(actions=range(tmpEnv.action_space.n),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    tmpEnv.close()

    print('Playing Game: ' + options.envName)

    while trainer.populations[
            None].curGen < options.generations:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent, [(agent, options.envName, scoreList,
                             options.trainEps, options.trainFrames, None)
                            for agent in trainer.getAllAgents(
                                skipTasks=[options.envName], noRef=True)])

        trainer.applyScores(scoreList)

        # do evolution after all envs played
        trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName])

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=[options.envName])[0]
        with open(logFileTpg, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + options.envName +
                ',' + str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')
示例#3
0
        [
            (agent, game, scoreQueue)  #(agent, envQueue, scoreQueue)
            for agent in trainer.getAllAgents(skipTasks=[])
        ])

    scores = []  # convert scores into list
    while not scoreQueue.empty():
        scores.append(scoreQueue.get())

    # save model before every evolve in case issue
    with open('gvgai-model-1be.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # apply scores
    trainer.applyScores(scores)
    trainer.evolve()  # go into next gen

    # save model after every gen
    with open('gvgai-model-1ae.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # at end of generation, make summary of scores
    summaryScores.append((trainer.scoreStats['min'], trainer.scoreStats['max'],
                          trainer.scoreStats['average']))  # min, max, avg
    print(chr(27) + "[2J")
    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('Results so far: ' + str(summaryScores))

print(chr(27) + "[2J")
print('Time Taken (Seconds): ' + str(time.time() - tStart))
print('Results: ' + str(summaryScores))
示例#4
0
                act = agent.act(getState(state),
                                valActs=valActs)  # get action from agent

                # feedback from env
                state, reward, isDone, debug = env.step(act)
                score += reward  # accumulate reward in score
                if isDone:
                    break  # end early if losing state

            agent.reward(score)  # must reward agent (if didn't already score)

        print('Agent #' + str(agent.getAgentNum()) + ' finished with score ' +
              str(score))
        curScores.append(score)  # store score

    trainer.evolve()

    # save model after every gen
    with open('gvgai-model-1.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # at end of generation, make summary of scores
    summaryScores.append((min(curScores), max(curScores),
                          sum(curScores) / len(curScores)))  # min, max, avg

    print(chr(27) + "[2J")
    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('Results so far: ' + str(summaryScores))

#clear_output(wait=True)
print('Time Taken (Seconds): ' + str(time.time() - tStart))
示例#5
0
        # run all agents on env
        pool.map(runAgent,
                 [(agent, envName, scoreList, 3, 18000, None)
                  for agent in trainer.getAllAgents(
                      skipTasks=[envName], noRef=True, popName=envName)])

        trainer.applyScores(scoreList, popName=envName)

        # report curEnv results to log
        scoreStats = trainer.getTaskScores(envName, popName=envName)
        bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0]
        with open(logName, 'a') as f:
            f.write(
                str(trainer.populations[envName].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' +
                str(scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + '\n')

        # do evolution on each env played
        trainer.evolve(fitMethod='single',
                       tasks=[envName],
                       elitistTasks=[envName],
                       popName=envName)

    # update most recent model results
    with open('trainer-8-pops.pkl', 'wb') as f:
        pickle.dump(trainer, f)
示例#6
0
        frames = 1000
    elif curGen == 25:
        frames = 5000
    elif curGen == 50:
        frames = 18000
    pool.map(runAgent,
             [(agent, envName, scoreList, 1, frames) for agent in agents])

    # apply scores
    trainer.applyScores(scoreList)

    tasks = [envName + '-' + str(frames)]
    scoreStats = trainer.generateScoreStats(tasks=tasks)
    allScores.append(
        (envName, scoreStats['min'], scoreStats['max'], scoreStats['average']))
    trainer.evolve(tasks=tasks, fitShare=False)  # go into next gen

    # save model after every gen
    with open('saved-model-sgp.pkl', 'wb') as f:
        pickle.dump(trainer, f)
    # save best agent after every gen
    #with open('best-agent-sgp.pkl','wb') as f:
    #    pickle.dump(trainer.getBestAgents(tasks=tasks)[0],f)

    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('On Generation: ' + str(curGen))
    #print('Results: ', str(allScores))

    with open(logFileName, 'a') as f:
        f.write(
            str(curGen) + ' | ' + str(envName) + ' | ' +
allScores = []  # track all scores each generation

tStart = time.time()

logFileName = 'train-log-' + datetime.datetime.now().strftime(
    "%Y-%m-%d-%H-%M") + '.txt'

while True:  # do generations with no end
    scoreList = man.list()

    pool.map(runAgent, [(agent, scoreList, 0.05)
                        for agent in trainer.getAllAgents(noRef=True)])

    # apply scores
    trainer.applyScores(scoreList)
    trainer.evolve(fitShare=False, tasks=[])
    scoreStats = trainer.scoreStats
    allScores.append((trainer.curGen, scoreStats['min'], scoreStats['max'],
                      scoreStats['average']))

    # save model after every gen
    with open('saved-model-1.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # save best agent each generation
    #with open('best-agent.pkl','wb') as f:
    #    pickle.dump(trainer.getBestAgent(tasks=[]), f)

    print(chr(27) + "[2J")
    print('Time So Far (Seconds): ' + str(time.time() - tStart))
    print('Results: ', str(allScores))