示例#1
0
    epsilon = 10**(-12)
    samples = collect_samples(sim)
    if len(samples) < 5000:
        samples += collect_samples(sim)
    discount = .8

    
    # construct a graph from the samples
    graph = pvf.construct_graph(samples, Simulator.states)

    basis = pvf.create_basis_function(graph, Simulator.states,
                                      Simulator.actions, k)

    policy = initialize_policy(0.0, discount, basis)

    final_policy, all_policies = lspi.lspi(maxiter, epsilon,
                                           samples, policy)

    value_policy = initialize_value_function_policy(sim)
    
    plt.figure()
    plt.subplot(2,2,1)
    approxV = display_qvalues(sim, final_policy)
    plt.title('Estimated Value Function')
    plt.subplot(2,2,2)
    display_qvalues(sim, final_policy, dim=1)
    plt.title('Estimated Value Function')

    #lt.subplot(1,2,2)
    #isplay_policy(sim, final_policy)
    #lt.show()
示例#2
0
            if (k, episode) not in data:
                data[(k, episode)] = []
            samples = rooms.collect_samples(sim, maxepisodes=episode, maxsteps=max_steps)

            graph = pvf.construct_graph(samples, sim.states)
            try:
                basis = pvf.create_basis_function(graph, sim.states,
                                                      sim.actions, k)
            except:
                print "Couldn't compute basis function for this data"
                continue
                    
            policy = rooms.initialize_policy(0.0, discount, basis)

            final_policy = lspi.lspi(maxiter, epsilon,
                                         samples, policy)[0]

            for n in range(num_tries):                
                execution_data = rooms.test_execution(sim, final_policy, maxsteps=max_steps)
                

                data[(k, episode)].append(execution_data)

    for episode in range(start_episode, end_episode+1, step_episode):
        for k in range(start_k, end_k+1, step_k):
            total_steps = 0
            data_list = data.get((k, episode), [])
            for data_point in data_list:
                total_steps += data_point[2]

            if k not in final_data: