示例#1
0
    print "#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#"

    reward = Reward()
    endcondition = EndCondition()

    maxsteps = b

    # El presupuesto inicial (igual al numero de pasos)
    init_budget = maxsteps - 2

    for rp in risk_profile:
        print "#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#"

        gridworld = GridWorld(xlim, ylim)
        grid_states = np.asarray(gridworld.getStates())
        actions = np.asarray(gridworld.getActions())

        # Define al agente
        agent = BudgetAgent(grid_states, actions, blim)
        agent.setAgent(initial_position)

        # alpha,gamma,epsilon
        agent.setQLearning(0.3, 0.8, 1.0)

        log = {'rewards': [], 'steps': []}

        for i in range(0, episodes):
            print "---------------------------------------------------------------------------------------------"
            print "START EPISODE"