Пример #1
0
def main():
    args = parse_arguments()
    agent_name = args.agent_name
    filename = args.filename
    num_epi = args.num_epi
    print(agent_name, filename, num_epi)
    pygame.init()
    pygame.display.set_mode([1, 1])
    task = MarioTask(initMarioMode=2)
    if agent_name == 'human':
        agent = HumanAgent(task.ACTION_MAPPING)
    else:
        agent = Forwardagent()

    exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    exp.train(num_epi)
    print 'mm 2:', task.reward

    if agent_name == 'human':
        print(all_action)
        with open('./expert_data/' + filename + '_demo.pckl', 'wb') as f:
            pickle.dump((exp.all_states, exp.all_actions), f)
    print "finished"
    pygame.quit()
Пример #2
0
def main():
    agent = MyAgent(None)
    #agent = AstarAgent()
    task = MarioTask(agent.name)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = int(sys.argv[1]) if len(sys.argv) == 2 else 0
    experiment = EpisodicExperiment(task, agent)

    n_individuals = 10
    filename = "learned_individuals_{0}".format(task.env.levelDifficulty)
    if os.path.exists(filename):
        initial_individuals = load(filename)
    else:
        initial_individuals = [
            Individual(random=True) for i in range(n_individuals)
        ]
    current_individuals = initial_individuals
    n_generations = 50
    for generation in range(n_generations):
        print("generation #{0} playing...".format(generation))
        #task.env.visualization = generation % 10 == 0
        task.env.visualization = generation % 50 == 0
        current_individuals = make_next_generation(experiment,
                                                   current_individuals,
                                                   generation)
        save(current_individuals, filename)
        savelog(log_list)
Пример #3
0
def main():
    f = open('try_3.txt', 'w')
    g = open('accs.txt', 'w')
    g.close()
    task = MarioTask("testbed", initMarioMode=2)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1

    results = []
    names = []

    with open('type.txt', 'w') as f:
        f.write('dt')

    iterations = 10
    rounds = 5
    learning_samples = 2
    eval_samples = 5

    agent = Supervise(IT, useKMM=False)
    exp = EpisodicExperiment(task, agent)
    E = Evaluator(agent, exp)

    if args['noisy']:
        prefix = 'dt-noisy-sup-eval'
    else:
        prefix = 'dt-sup-eval'

    sl_data, sup_data, acc, loss = E.eval(rounds=rounds,
                                          iterations=iterations,
                                          learning_samples=learning_samples,
                                          eval_samples=eval_samples,
                                          prefix=prefix)

    np.save('./data/' + prefix + '-sl_data.npy', sl_data)
    np.save('./data/' + prefix + '-acc.npy', acc)
    np.save('./data/' + prefix + '-loss.npy', loss)

    analysis = Analysis()
    analysis.get_perf(sl_data, range(iterations))
    analysis.plot(names=['Supervised Learning'],
                  label='Reward',
                  filename='./results/' + prefix +
                  '-return_plots.eps')  #, ylims=[0, 1600])

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['Supervised Learning Acc.'],
               label='Accuracy',
               filename='./results/' + prefix + '-acc_plots.eps')

    loss_a = Analysis()
    loss_a.get_perf(loss, range(iterations))
    loss_a.plot(names=['Supervised Loss'],
                label='Loss',
                filename='./results/' + prefix + '-loss_plots.eps')

    #agent.saveModel()
    print "finished"
Пример #4
0
def main():
    f = open('try_3.txt', 'w')
    g = open('accs.txt', 'w')
    g.close()
    task = MarioTask("testbed", initMarioMode=2)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1

    results = []
    names = []

    iterations = 50
    rounds = 15

    agent = Supervise(IT, useKMM=False)
    exp = EpisodicExperiment(task, agent)
    T = Tester(agent, exp)
    sl_data, sup_data, acc = T.test(rounds=rounds, iterations=iterations)

    np.save('./data/sup_data.npy', sup_data)
    np.save('./data/sl_data.npy', sl_data)
    np.save('./data/acc.npy', acc)

    IPython.embed()

    analysis = Analysis()
    analysis.get_perf(sup_data, range(iterations))
    analysis.get_perf(sl_data, range(iterations))
    analysis.plot(names=['Supervisor', 'Supervised Learning'],
                  label='Reward',
                  filename='./results/return_plots.eps')  #, ylims=[0, 1600])

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['Supervised Learning Acc.'],
               label='Accuracy',
               filename='./results/acc_plots.eps')

    print "finished"
Пример #5
0
def main():
    f = open('try_3.txt', 'w')
    g = open('accs.txt', 'w')
    g.close()
    task = MarioTask("testbed", initMarioMode=2)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1
    task.env.BASE_LEVEL = 500000

    results = []
    names = []

    iterations = 20
    rounds = 30
    learning_samples = 33
    eval_samples = 10

    # iterations = 5
    # rounds = 2
    # learning_samples = 3
    # eval_samples = 2

    if args['noisy']:
        agent = NoisySupervise(IT, useKMM=False)
        dire = './training_data_noisy/'
    else:
        agent = Supervise(IT, useKMM=False)
        dire = './training_data/'

    exp = EpisodicExperiment(task, agent)
    C = Collector(agent, exp)
    C.collect(rounds=rounds,
              iterations=iterations,
              learning_samples=learning_samples,
              eval_samples=eval_samples,
              directory=dire)

    print "finished"
Пример #6
0
def main():
    filename = "learned_individuals_{0}".format(0)
    rand_individual = Individual(data=filename, random=True)
    agent = MyAgent(rand_individual)
    task = MarioTask(agent.name, initMarioMode=2)
    exp = EpisodicExperiment(task, agent)
    print('Task Ready')
    exp.doEpisodes(2)
    print('mm 2:', task.reward)

    task.env.initMarioMode = 1
    exp.doEpisodes(1)
    print('mm 1:', task.reward)

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print('mm 0:', task.reward)

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print('mm 0:', task.reward)

    task.env.initMarioMode = 0
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print('mm 0, ld 5: ', task.reward)

    task.env.initMarioMode = 1
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print('mm 1, ld 5: ', task.reward)

    task.env.initMarioMode = 2
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print('mm 2, ld 5: ', task.reward)

    print("finished")
Пример #7
0
def main():
    f = open('try_3.txt','w')
    g = open('accs.txt', 'w')
    g.close()
    task = MarioTask("testbed", initMarioMode = 2)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1

    results = [] 
    names = [] 

    with open('type.txt', 'w') as f:
        f.write('ent')
    
    # # #test dagger
    # iterations = 1
    # rounds = 1
    
    iterations = 50
    rounds = 15
    #agent = Dagger(IT,useKMM = False)
    #exp = EpisodicExperiment(task, agent) 
    #T = Tester(agent,exp)
    #dagger_results = T.test(rounds = rounds,iterations = iterations)
    #dagger_data = dagger_results[-1]
    #dagger_results = dagger_results[:-1]
    #results.append(dagger_results)
    #names.append('dagger')
    #pickle.dump(results,open('results.p','wb'))

    #agent = Dagger(IT, useKMM=False)
    #exp = EpisodicExperiment(task, agent)
    #T = Tester(agent, exp)
    #dagger_data, _, acc = T.test(rounds = rounds, iterations = iterations)
    
    agent = Supervise(IT,useKMM = False)
    exp = EpisodicExperiment(task, agent) 
    T = Tester(agent,exp)
    prefix = 'dt-noisy-sup-change-entropy'
    sl_data, sup_data, acc = T.test(rounds = rounds, iterations = iterations, prefix = prefix)

    np.save('./data/' + prefix + '-sup_data.npy', sup_data)
    np.save('./data/' + prefix + '-sl_data.npy', sl_data)
    np.save('./data/' + prefix + '-acc.npy', acc)    
    
    # IPython.embed()

    analysis = Analysis()
    analysis.get_perf(sup_data, range(iterations))
    analysis.get_perf(sl_data, range(iterations))
    analysis.plot(names=['Supervisor', 'Supervised Learning'], label='Reward', filename='./results/' + prefix + '-return_plots.eps')#, ylims=[0, 1600])

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/' + prefix + '-acc_plots.eps')

    """


    agent = Dagger(IT,useKMM = False)
    exp = EpisodicExperiment(task, agent) 
    T = Tester(agent,exp)
    dagger_data, _, acc = T.test(rounds = rounds, iterations = iterations)

    np.save('./data/dagger_data.npy', dagger_data)
    np.save('./data/acc.npy', acc)    
    
    IPython.embed()

    analysis = Analysis()
    analysis.get_perf(dagger_data, range(iterations))
    analysis.plot(names=['DAgger'], label='Reward', filename='./results/return_plots.eps')

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['DAgger Acc.'], label='Accuracy', filename='./results/acc_plots.eps')

    """
    
    #agent = Supervise(IT,useKMM = False)
    #exp = EpisodicExperiment(task, agent) 
    #T = Tester(agent,exp)
    #supervise_results = T.test(rounds = rounds, iterations = iterations)
    #supervise_data = supervise_results[-1]
    #supervise_results = supervise_results[:-1]
    #results.append(supervise_results)
    #names.append('supervise')
    #pickle.dump(results,open('results.p','wb'))

    #IPython.embed()

    #analysis = Analysis()
    #analysis.get_perf(supervise_data, results[1][5])
    #analysis.get_perf(dagger_data, results[0][5])
    #analysis.plot(names=['Supervise', 'DAgger'], label='Reward', filename='./return_plot.eps')#, ylims=[-1, 0])




    # agent = Sheath(IT,useKMM = False,sigma = 1.0)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # dagger_results = T.test(rounds = 10,iterations = 35)
    # results.append(dagger_results)
    # names.append('sheath_1')
    # pickle.dump(results,open('results.p','wb'))

    # agent = Sheath(IT,useKMM = False,sigma = 1e-1)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # dagger_results = T.test(rounds = 10,iterations = 35)
    # results.append(dagger_results)
    # names.append('sheath_1')
    # pickle.dump(results,open('results.p','wb'))


    
    # agent = Sheath(IT,useKMM = False,sigma = 0.5)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # dagger_results = T.test(rounds = 10,iterations = 35)
    # results.append(dagger_results)
    # names.append('sheath_1')

    # pickle.dump(results,open('results.p','wb'))
    # agent = Sheath(IT,useKMM = False,sigma = 1e-1)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # dagger_results = T.test(rounds = 4,iterations = 35)
    # results.append(dagger_results)
    # names.append('sheath_1')
    

    # agent = Sheath(IT,useKMM = False,sigma = 1e-2)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # dagger_results = T.test(rounds = 4,iterations = 35)
    # results.append(dagger_results)
    # names.append('sheath_1')
    # # # # # #test big ahude
    # agent = Ahude(IT,f,gamma = 1e-2,labelState = True, useKMM = True)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # ahude_big_results = T.test(rounds = 3)
    # results.append(ahude_big_results)
    # names.append('ahude_1e-1')

    # pickle.dump(results,open('results.p','wb'))


    # # # # # #test med ahude
    # agent = Ahude(IT,f,gamma = 1e-2,labelState = False,useKMM = True)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # ahude_med_results = T.test(rounds = 3)
    # results.append(ahude_med_results)
    # names.append('ahude_1e-2')
    
    # # #

    # # # # # # #test small ahude 
    # agent = Ahude(IT,f,gamma = 1e-3)
    # exp = EpisodicExperiment(task, agent) 
    # T = Tester(agent,exp)
    # ahude_small_results = T.test() 
    # results.append(ahude_small_results)
    # names.append('ahude_1e-3')
    
 
    # pickle.dump(results,open('results.p','wb'))

    #plt.figure(1)
    #for i in range(len(results)):
    #    plt.plot(results[i][5],results[i][1])
    
    
    #plt.legend(names,loc='upper left')

    # plt.figure(2)
    # for i in range(len(results)):
    #     plt.plot(results[i][0])

    # plt.legend(names,loc='upper left')

    # plt.figure(3)
    # for i in range(0,len(results)):
    #     plt.plot(results[i][3])

    # plt.legend(names,loc='upper left')


    plt.show()
    
    # IPython.embed()
    f.close()           
       

    #agent.saveModel()
    print "finished"
Пример #8
0
def main():
    clo = CmdLineOptions(sys.argv)
    task = MarioTask(MarioEnvironment(clo.getHost(), clo.getPort(), clo.getAgent().name))
    exp = EpisodicExperiment(clo.getAgent(), task)
    exp.doEpisodes(3)
Пример #9
0
def main():
    f = open('try_3.txt','w')
    g = open('accs.txt', 'w')
    g.close()
    task = MarioTask("testbed", initMarioMode = 2)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1

    results = [] 
    names = [] 


    
    iterations = 5
    rounds = 2
    learning_samples = 2
    eval_samples = 3


    if args['noisy']:
        prefix = '-noisy-sup-eval'
        dire = './training_data_noisy/'
        agent = NoisySupervise(IT, useKMM = False)
    else:
        prefix = '-sup-eval'
        dire = './training_data/'
        agent = Supervise(IT,useKMM = False)

    if args['linear']:
        agent.learner.linear = True
        prefix = 'svc' + prefix
    else:
        agent.learner.linear = False
        prefix = 'dt' + prefix

    exp = EpisodicExperiment(task, agent) 
    E = Evaluator(agent,exp)
    sl_data, sup_data, acc, loss, js = E.eval(rounds = rounds, iterations = iterations, 
        learning_samples=learning_samples, eval_samples=eval_samples, prefix = prefix,
        directory = dire)

    np.save('./data/' + prefix + '-sl_data.npy', sl_data)
    np.save('./data/' + prefix + '-acc.npy', acc)
    np.save('./data/' + prefix + '-loss.npy', loss)
    np.save('./data/' + prefix + '-js.npy', js)


    analysis = Analysis()
    analysis.get_perf(sl_data, range(iterations))
    analysis.plot(names=['Supervised Learning'], label='Reward', filename='./results/' + prefix + '-return_plots.eps')#, ylims=[0, 1600])

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/' + prefix + '-acc_plots.eps', ylims=[0, 1])

    loss_a = Analysis()
    loss_a.get_perf(loss, range(iterations))
    loss_a.plot(names=['Supervised Learning loss'], label='Loss', filename='./results/' + prefix + '-loss_plots.eps', ylims=[0, 1])
    
    js_a = Analysis()
    js_a.get_perf(js, range(iterations))
    js_a.plot(names=['Supervised Learning'], label='J()', filename='./results/' + prefix + '-js_plots.eps')


       

    #agent.saveModel()
    print "finished"
Пример #10
0
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode=2)
    exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    exp.doEpisodes(2)
    print 'mm 2:', task.reward

    task.env.initMarioMode = 1
    exp.doEpisodes(1)
    print 'mm 1:', task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward

    task.env.initMarioMode = 0
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print 'mm 0, ld 5: ', task.reward

    task.env.initMarioMode = 1
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print 'mm 1, ld 5: ', task.reward

    task.env.initMarioMode = 2
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print 'mm 2, ld 5: ', task.reward

    print "finished"
Пример #11
0
    IPython.embed()

    analysis = Analysis()
    analysis.get_perf(sup_data, range(iterations))
    analysis.get_perf(sl_data, range(iterations))
    analysis.plot(names=['Supervisor', 'Supervised Learning'], label='Reward', filename='./results/return_plots.eps')#, ylims=[0, 1600])

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['Supervised Learning Acc.'], label='Accuracy', filename='./results/acc_plots.eps')

    """


    agent = Dagger(IT,useKMM = False)
    exp = EpisodicExperiment(task, agent) 
    T = Tester(agent,exp)
    prefix = 'svc-dagger-change'
    dagger_data, _, acc = T.test(rounds = rounds, iterations = iterations, prefix = prefix)

    np.save('./data/svc-dagger-change-dagger_data.npy', dagger_data)
    np.save('./data/svc-dagger-change-acc.npy', acc)    
    
    # IPython.embed()

    analysis = Analysis()
    analysis.get_perf(dagger_data, range(iterations))
    analysis.plot(names=['DAgger'], label='Reward', filename='./results/svc-dagger-change-return_plots.eps')

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
Пример #12
0
def main():
    f = open('try_3.txt', 'w')
    g = open('accs.txt', 'w')
    g.close()
    task = MarioTask("testbed", initMarioMode=2)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1

    results = []
    names = []

    with open('type.txt', 'w') as f:
        f.write('dt')

    iterations = 20
    rounds = 30
    learning_samples = 33
    eval_samples = 10

    # iterations = 5
    # rounds = 2
    # learning_samples = 3
    # eval_samples = 2

    agent = Dagger(IT, useKMM=False)

    if args['linear']:
        agent.learner.linear = True
        prefix = 'svc-dagger-change-'
    else:
        agent.learner.linear = False
        prefix = 'dt-dagger-change-'

    exp = EpisodicExperiment(task, agent)
    T = Tester(agent, exp)
    dagger_data, _, acc, loss, js, test_acc = T.test(
        rounds=rounds,
        iterations=iterations,
        learning_samples=learning_samples,
        eval_samples=eval_samples,
        prefix=prefix)

    np.save('./data/' + prefix + 'dagger_data.npy', dagger_data)
    np.save('./data/' + prefix + 'acc.npy', acc)
    np.save('./data/' + prefix + 'loss.npy', loss)
    np.save('./data/' + prefix + 'js.npy', js)
    np.save('./data/' + prefix + 'test_acc.npy', test_acc)

    analysis = Analysis()
    analysis.get_perf(dagger_data, range(iterations))
    analysis.plot(names=['DAgger'],
                  label='Reward',
                  filename='./results/' + prefix + 'return_plots.eps')

    acc_a = Analysis()
    acc_a.get_perf(acc, range(iterations))
    acc_a.plot(names=['DAgger Acc.'],
               label='Accuracy',
               filename='./results/' + prefix + 'acc_plots.eps',
               ylims=[0, 1])

    test_acc_a = Analysis()
    test_acc_a.get_perf(test_acc, range(iterations))
    test_acc_a.plot(names=['DAgger Acc.'],
                    label='Test Accuracy',
                    filename='./results/' + prefix + 'test_acc_plots.eps',
                    ylims=[0, 1])

    loss_a = Analysis()
    loss_a.get_perf(loss, range(iterations))
    loss_a.plot(names=['DAgger Loss'],
                label='Loss',
                filename='./results/' + prefix + 'loss_plots.eps',
                ylims=[0, 1])

    js_a = Analysis()
    js_a.get_perf(js, range(iterations))
    js_a.plot(names=['DAgger'],
              label='J()',
              filename='./results/' + prefix + '-js_plots.eps')

    print "finished"
Пример #13
0
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode = 2)
    exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    exp.doEpisodes(2)
    print 'mm 2:', task.reward

    task.env.initMarioMode = 1
    exp.doEpisodes(1)
    print 'mm 1:', task.reward
    
    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward
    
    task.env.initMarioMode = 0
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print 'mm 0, ld 5: ', task.reward
    
    task.env.initMarioMode = 1
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print 'mm 1, ld 5: ', task.reward

    task.env.initMarioMode = 2
    task.env.levelDifficulty = 5
    exp.doEpisodes(1)
    print 'mm 2, ld 5: ', task.reward
    print "finished"
Пример #14
0
def main():
    f = open('try_3.txt','w')
    agent = Ahude(IT,f)
    distances = np.zeros([1])
    data = np.zeros([1])
    num_help = np.zeros([1])
    mis_match = np.zeros([1])
    

    #task = MarioTask(agent.name, initMarioMode = 2)
    #exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    #task.env.initMarioMode = 2
    #task.env.levelDifficulty = 1
    task = MarioTask(agent.name, initMarioMode = 2)
    exp = EpisodicExperiment(task, agent)
    task.env.initMarioMode = 2
    task.env.levelDifficulty = 1
    if(agent.initialTraining):
        exp.doEpisodes(2)
        agent.newModel()
        agent.saveModel()
    else:
        for i in range(ITERATIONS):
            #IPython.embed()
            #if( i == 2):
                #agent.initialTraining = True; 
             #   IPython.embed()
            print "ITERATION",i
            f.write('ITERATION %i \n' %i)
            f.write('___________________________________________________\n')
            rewards = exp.doEpisodes(1)
            
           
           
            agent.updateModel()
           

            if(agent._getName() == 'Ahude'):
                num_help = np.vstack((num_help,np.array(agent.getNumHelp())))
                mis_match = np.vstack((mis_match,np.array(agent.getMismatch())))
                #agent.off = True
                #rewards = exp.doEpisodes(1)
                #agent.off = False

            size = len(rewards[0])
                
            distances = np.vstack((distances,np.array(rewards[0][size-1])))
            data = np.vstack((data,np.array(agent.getNumData())))
            agent.reset()
          
            #agent.notComplete = False
            print "TRACK COMPLETE"
        #IPython.embed()
        # IPython.embed()
        f.close()           
       

        plt.figure(2)
        plt.plot(data,distances)

        if(agent._getName() == 'Ahude'):    
            plt.figure(1)
            plt.plot(data,num_help)
            plt.figure(3)
            plt.plot(mis_match)
        plt.show()
    #agent.saveModel()
    print "finished"