Exemplo n.º 1
0
def get_active_exp(env, threshold, ae, xm, xs, render, take_max = False, max_act_steps = 20):

    err_avg = 0
    for i in range(20):
        state = env.reset()
        state = robot_reset(env)
        error = ae.error((np.concatenate((state["observation"],
                                    state["achieved_goal"],
                                    state["desired_goal"])).reshape((1,-1)) - xm)/xs)
        err_avg+=error
    err_avg/=20

    state = env.reset()
    error = ae.error((np.concatenate((state["observation"],
                                    state["achieved_goal"],
                                    state["desired_goal"])).reshape((1,-1)) - xm)/xs)
    #print("predicted error", error)

    if not take_max:
        tried = 0
        while not error > threshold*err_avg:
            tried+=1
            state = env.reset()
            state = robot_reset(env)
            error = ae.error((np.concatenate((state["observation"],
                                            state["achieved_goal"],
                                            state["desired_goal"])).reshape((1,-1)) - xm)/xs)
      #      print("predicted error", error.numpy(), err_avg.numpy())
     #   print("Tried ", tried, " initial states")
        new_states, new_acts = man_controller.get_demo(env, state, CTRL_NORM, render)

        return new_states, new_acts

    else:
        errs_states = []
        for k in range(max_act_steps):
            state = env.reset()
            state = robot_reset(env)
            error = ae.error((np.concatenate((state["observation"],
                                            state["achieved_goal"],
                                            state["desired_goal"])).reshape((1,-1)) - xm)/xs)
            s, g = utils.save_state(env)
            errs_states.append([s, g, error])

        max_error = -1000
        max_key = ()
        for el in range(len(errs_states)):
            if errs_states[el][2] > max_error:
                max_error = errs_states[el][2]
                max_key = el

        new_env = utils.set_state(env, errs_states[max_key][0], errs_states[max_key][1])
        state, *_ = new_env.step(np.zeros(4))

        new_states, new_acts = man_controller.get_demo(new_env, state, CTRL_NORM, render)

        return new_states, new_acts
Exemplo n.º 2
0
def get_active_exp(env, threshold, ae, xm, xs, render):

    err_avg = 0
    for i in range(20):
        state = env.reset()
        error = ae.error((np.concatenate(
            (state["observation"], state["achieved_goal"],
             state["desired_goal"])).reshape((1, -1)) - xm) / xs)
        err_avg += error
    err_avg /= 20

    state = env.reset()
    error = ae.error((np.concatenate(
        (state["observation"], state["achieved_goal"],
         state["desired_goal"])).reshape((1, -1)) - xm) / xs)
    #print("predicted error", error)

    tried = 0
    while not error > threshold * err_avg:
        tried += 1
        state = env.reset()
        error = ae.error((np.concatenate(
            (state["observation"], state["achieved_goal"],
             state["desired_goal"])).reshape((1, -1)) - xm) / xs)
#      print("predicted error", error.numpy(), err_avg.numpy())

#   print("Tried ", tried, " initial states")
    new_states, new_acts = man_controller.get_demo(env, state, render)

    return new_states, new_acts
Exemplo n.º 3
0
def get_experience(eps, env):
    states, actions = [], []
    for ep in range(eps):
        state = env.reset()
        new_states, new_acts = man_controller.get_demo(env, state)
        states += new_states
        actions += new_acts

    return states, actions
Exemplo n.º 4
0
def get_experience(eps, env, render = False):
    states, actions = [], []
    for ep in range(eps):
        state = env.reset()
        #state = robot_reset(env)
        new_states, new_acts = man_controller.get_demo(env, state, CTRL_NORM, render)
        states+=new_states
        actions+=new_acts

    return states, actions
Exemplo n.º 5
0
def get_active_exp2(env, avg_error_trainset, model, ae, xm, xs, am, ast, render, take_max = False, max_act_steps = 20):
    state = env.reset()
    state = robot_reset(env)
    succeded = True
    while succeded:
        state = env.reset()
        state = robot_reset(env)
        succeded, env, state, error = try_complete(model, ae, avg_error_trainset*ACTIVE_ERROR_THR, env, xm, xs, am, ast, render = RENDER_TEST)
    #Here we have the env and the state where the robot doesn't know what to do.
    time.sleep(1.)
#    print("Expert demo.")
    new_states, new_acts = man_controller.get_demo(env, state, CTRL_NORM, render)

#    if len(new_states) > 100:
        #If it's so long the demo failed.
        #Should consider to reset it and try again, otherwise we waste a demo.
#        return [], []

    #Recursively call until a demo works.
    while len(new_states) > 100:
        #If it's so long the demo failed.
        #Should consider to reset it and try again, otherwise we waste a demo.
        new_states, new_acts = get_active_exp2(env, avg_error_trainset, model, ae, xm, xs, am, ast, render, take_max = False, max_act_steps = 20)
    return new_states, new_acts