示例#1
0
def estimate_log_evidence_pm_pendulum(load_path="./result_apply/",
                                      save_path="./result_apply/",
                                      loop_num=0,
                                      i=5):
    gather_sample.gather_real_world_sample(loop_num=loop_num)
    X = np.loadtxt(load_path + 'real_world_samples_input.csv', delimiter=',')
    Y = np.loadtxt(load_path + 'real_world_samples_output.csv', delimiter=',')
    X = np.delete(X, i, 0)
    Y = np.delete(Y, i, 0)
    pm_class = pendulum_pm.PendulumPM(X, Y)

    gather_sample.gather_real_world_sample(loop_num=loop_num)
    X = np.loadtxt(load_path + 'real_world_samples_input.csv', delimiter=',')
    Y = np.loadtxt(load_path + 'real_world_samples_output.csv', delimiter=',')
    X = np.delete(X, i, 0)
    Y = np.delete(Y, i, 0)
    X2 = np.loadtxt(load_path + 'real_world_samples_input.csv', delimiter=',')
    Y2 = np.loadtxt(load_path + 'real_world_samples_output.csv', delimiter=',')

    print("prev_log_evidence =", pm_class.log_evidence())
    print("debug_log_evidence =", pm_class.log_evidence_of_new_data(X, Y))
    print("new_log_evidence =", pm_class.log_evidence_of_new_data(X2, Y2))
    print("diff =",
          pm_class.log_evidence_of_new_data(X2, Y2) - pm_class.log_evidence())
    temp_data = np.array(
        [pm_class.log_evidence(),
         pm_class.log_evidence_of_new_data(X2, Y2)])
    return (pm_class.log_evidence_of_new_data(X2, Y2) -
            pm_class.log_evidence())
def estimate_log_evidence_pm_pendulum(load_path="./result_apply/",
                             save_path="./result_apply/",
                             loop_num=0):
    gather_sample.gather_real_world_sample(loop_num=loop_num)
    X = np.loadtxt(load_path+'real_world_samples_input.csv',  delimiter=',')
    Y = np.loadtxt(load_path+'real_world_samples_output.csv', delimiter=',')
    pm_class = pendulum_pm.PendulumPM(X,Y)

    gather_sample.gather_real_world_sample(loop_num=loop_num)
    X = np.loadtxt(load_path+'real_world_samples_input.csv',  delimiter=',')
    Y = np.loadtxt(load_path+'real_world_samples_output.csv', delimiter=',')

    X2 = np.r_[X, np.loadtxt(load_path+"policy"+str(loop_num)+'/simulation2/simulation_samples_input.csv' , delimiter=',')]
    Y2 = np.r_[Y, np.loadtxt(load_path+"policy"+str(loop_num)+'/simulation2/simulation_samples_output.csv', delimiter=',')]

    print("prev_log_evidence =",pm_class.log_evidence())
    print("debug_log_evidence =",pm_class.log_evidence_of_new_data(X,Y))
    print("new_log_evidence =",pm_class.log_evidence_of_new_data(X2,Y2))
    temp_data = np.array([pm_class.log_evidence(), pm_class.log_evidence_of_new_data(X2,Y2)])
    np.savetxt(save_path+"policy"+str(loop_num)+'/simulation2/log_evidence.csv',temp_data,delimiter=',')
    np.savetxt(save_path+"policy"+str(loop_num)+'/log_evidence_for_learn2.csv',np.array([pm_class.log_evidence()]),delimiter=',', fmt ='%.6f')
    return temp_data[1]
示例#3
0
def custom_pendulum_wrap(
    env,
    model_switch=0,
    dirname="./data_debug/",
    filename1="debug_input.csv",
    filename2="debug_output.csv",
):
    # 1 #
    if model_switch < 9:
        real_dynamics = pendulum_real_model.PendulumDynamics()
    else:
        real_dynamics = pendulum_real_model_pilco.PendulumDynamics()
    real_dynamics.logger_parameter()
    real_dynamics.wrap_env(env)

    # 2 #
    if model_switch > 0:
        dataX = np.loadtxt(dirname + filename1, delimiter=',')
        dataY = np.loadtxt(dirname + filename2, delimiter=',')
        if model_switch < 9:
            init_s = np.loadtxt(dirname + 'current_state.csv', delimiter=',')
        else:
            init_s = None
    if 1 == model_switch or 3 == model_switch or 4 == model_switch or 11 == model_switch or 111 == model_switch:
        npm_class = pendulum_npm.PendulumNPM(dataX, dataY, init_state=init_s)
        npm_class.logger_parameter()
    if 2 == model_switch or 3 == model_switch or 4 == model_switch:
        pm_class = pendulum_pm.PendulumPM(dataX, dataY, init_state=init_s)
        pm_class.logger_parameter()

    # 3 #
    best_fit_model = 0
    if 1 == model_switch or 2 == model_switch or 5 == model_switch:
        best_fit_model = model_switch
    if 3 == model_switch:
        log_evidence = pm_class.log_evidence() - npm_class.log_evidence()
        if log_evidence > 0.:
            best_fit_model = 2
        else:
            best_fit_model = 1
        logger.log("log_evidence_diff =", log_evidence)

    if 4 == model_switch:
        npmcv = npm_class.k_fold_cv(dataX, dataY, k=10)
        pmcv = pm_class.k_fold_cv(dataX, dataY, k=10)
        cv = pmcv - npmcv
        if cv > 0.:
            best_fit_model = 2
        else:
            best_fit_model = 1

    # 4 #
    if 1 == best_fit_model:
        npm_class.wrap_env(env)
    if 2 == best_fit_model:
        pm_class.wrap_env(env)

    if 11 == model_switch or 111 == model_switch:  # only for comparison with pilco
        npm_class.thdot_clip_value = 50.  # only for comparison with pilco
        # In this case, the resulting real-world velocity exceeds the max_speed given for accelerating learning process for main results.
        npm_class.wrap_env_pilco(env)  # only for comparison with pilco
        npm_class.logger_parameter()
        best_fit_model = model_switch

    logger.log("flag in custom_pendulum_wrap =", model_switch)
    logger.log("actually_selected_model=", best_fit_model)
    if (best_fit_model + model_switch) > 0:
        logger.log("init_state=", init_s)
    return best_fit_model
示例#4
0
    real_dynamics.wrap_env(env.env)


    dn ="./data_debug2/"
    fn1="debug_input.csv"
    fn2="debug_output.csv"
    #pendulum_real_model.generate_test_samples(dirname=dn,filename1=fn1,filename2=fn2)
    dataX = np.loadtxt(dn+fn1, delimiter=',')
    dataY = np.loadtxt(dn+fn2, delimiter=',')

    test_class = PendulumPM(dataX,dataY)
    test_class.wrap_env(env.env)
    test_class.logger_parameter()

    import pendulum_pm # add
    test2_class = pendulum_pm.PendulumPM(dataX,dataY) # add
    test2_class.wrap_env(env.env)  # add
    test2_class.logger_parameter() # add  

    ''' # add
    episode_count=0
    while episode_count<1:
        env.reset()
        env.env.state= np.array([[np.pi],[0.]])

        while True:
            ac = env.action_space.sample()
            ob, rew, new, _ = env.step(ac)
            env.render()
            if new:
                episode_count +=1