Exemplo n.º 1
0
# Run 5 individual experiments experiments.
for k in range(len(Es)):
    # Generate an ensemble of n experiments
    for j in range(n):
        # reset the action probabilities.
        lrp.reset_actions()
        # Run a single experiment. Terminate if it reaches 10000 iterations.
        while (True):
            # Define m as the next action predicting the depth of the object.
            m = lrp.next_action()
            # Defin req as the next detectable object depth.
            req = det_obj.request()
            # reward if m = req.
            resp = env.response(m, req)
            if (not resp):
                lrp.do_reward(m)
            else:
                lrp.do_penalty(m)
            if (max(lrp.p) > 0.98):
                # The best depth counting from 0.
                # Break at 98% convergence to a single depth.
                bestdepth[np.argmax(lrp.p)] += 1
                break
        if (j == time_between):
            mse.next_env()
            det_obj.set_env(mse.env_now())
            print("The desired vector is now: " + str(mse.env_now()))

    print("The probability vector is: " + str(bestdepth / sum(bestdepth)))
Exemplo n.º 2
0
    # Generate an ensemble of n experiments
    source.goto(-300, depths[k])
    receiver.clear()
    receiver1.clear()
    for i in range(num_actions):
        transmission[i].clear()
        transmission[i].color("green")
        transmission[i].shape("arrow")
        transmission[i].shapesize(.5, .5)
        transmission[i].penup()
        transmission[i].setpos(-300, depths[k])
        transmission[i].pendown()
        transmission[i].goto(-150, depths[i])
        transmission[i].write(mse.env_now()[i])

    det_obj.set_env(mse.env_now())
    first_uav.set_env(mse1.env_now())
    print("The desired vector is now: " + str(mse.env_now()))
    # lrp.a = tune.find_optimal_a(lrp, env, det_obj)
    # print("Optimized value for a is: " + str(lrp.a))
    lrp.a = 0.99999999999999
    lrp.b = 0.5
    bestdepth = np.zeros(num_actions)
    bestdepth1 = np.zeros(num_actions)
    current_best = 0
    current_best1 = 0
    for j in range(n):
        # reset the action probabilities.
        # lrp.reset_actions()
        count = 0
        # lrp.b = tune.find_optimal_b(lrp, env, det_obj)