def estimate_log_evidence_pm_pendulum(load_path="./result_apply/", save_path="./result_apply/", loop_num=0, i=5): gather_sample.gather_real_world_sample(loop_num=loop_num) X = np.loadtxt(load_path + 'real_world_samples_input.csv', delimiter=',') Y = np.loadtxt(load_path + 'real_world_samples_output.csv', delimiter=',') X = np.delete(X, i, 0) Y = np.delete(Y, i, 0) pm_class = pendulum_pm.PendulumPM(X, Y) gather_sample.gather_real_world_sample(loop_num=loop_num) X = np.loadtxt(load_path + 'real_world_samples_input.csv', delimiter=',') Y = np.loadtxt(load_path + 'real_world_samples_output.csv', delimiter=',') X = np.delete(X, i, 0) Y = np.delete(Y, i, 0) X2 = np.loadtxt(load_path + 'real_world_samples_input.csv', delimiter=',') Y2 = np.loadtxt(load_path + 'real_world_samples_output.csv', delimiter=',') print("prev_log_evidence =", pm_class.log_evidence()) print("debug_log_evidence =", pm_class.log_evidence_of_new_data(X, Y)) print("new_log_evidence =", pm_class.log_evidence_of_new_data(X2, Y2)) print("diff =", pm_class.log_evidence_of_new_data(X2, Y2) - pm_class.log_evidence()) temp_data = np.array( [pm_class.log_evidence(), pm_class.log_evidence_of_new_data(X2, Y2)]) return (pm_class.log_evidence_of_new_data(X2, Y2) - pm_class.log_evidence())
def estimate_log_evidence_pm_pendulum(load_path="./result_apply/", save_path="./result_apply/", loop_num=0): gather_sample.gather_real_world_sample(loop_num=loop_num) X = np.loadtxt(load_path+'real_world_samples_input.csv', delimiter=',') Y = np.loadtxt(load_path+'real_world_samples_output.csv', delimiter=',') pm_class = pendulum_pm.PendulumPM(X,Y) gather_sample.gather_real_world_sample(loop_num=loop_num) X = np.loadtxt(load_path+'real_world_samples_input.csv', delimiter=',') Y = np.loadtxt(load_path+'real_world_samples_output.csv', delimiter=',') X2 = np.r_[X, np.loadtxt(load_path+"policy"+str(loop_num)+'/simulation2/simulation_samples_input.csv' , delimiter=',')] Y2 = np.r_[Y, np.loadtxt(load_path+"policy"+str(loop_num)+'/simulation2/simulation_samples_output.csv', delimiter=',')] print("prev_log_evidence =",pm_class.log_evidence()) print("debug_log_evidence =",pm_class.log_evidence_of_new_data(X,Y)) print("new_log_evidence =",pm_class.log_evidence_of_new_data(X2,Y2)) temp_data = np.array([pm_class.log_evidence(), pm_class.log_evidence_of_new_data(X2,Y2)]) np.savetxt(save_path+"policy"+str(loop_num)+'/simulation2/log_evidence.csv',temp_data,delimiter=',') np.savetxt(save_path+"policy"+str(loop_num)+'/log_evidence_for_learn2.csv',np.array([pm_class.log_evidence()]),delimiter=',', fmt ='%.6f') return temp_data[1]
def custom_pendulum_wrap( env, model_switch=0, dirname="./data_debug/", filename1="debug_input.csv", filename2="debug_output.csv", ): # 1 # if model_switch < 9: real_dynamics = pendulum_real_model.PendulumDynamics() else: real_dynamics = pendulum_real_model_pilco.PendulumDynamics() real_dynamics.logger_parameter() real_dynamics.wrap_env(env) # 2 # if model_switch > 0: dataX = np.loadtxt(dirname + filename1, delimiter=',') dataY = np.loadtxt(dirname + filename2, delimiter=',') if model_switch < 9: init_s = np.loadtxt(dirname + 'current_state.csv', delimiter=',') else: init_s = None if 1 == model_switch or 3 == model_switch or 4 == model_switch or 11 == model_switch or 111 == model_switch: npm_class = pendulum_npm.PendulumNPM(dataX, dataY, init_state=init_s) npm_class.logger_parameter() if 2 == model_switch or 3 == model_switch or 4 == model_switch: pm_class = pendulum_pm.PendulumPM(dataX, dataY, init_state=init_s) pm_class.logger_parameter() # 3 # best_fit_model = 0 if 1 == model_switch or 2 == model_switch or 5 == model_switch: best_fit_model = model_switch if 3 == model_switch: log_evidence = pm_class.log_evidence() - npm_class.log_evidence() if log_evidence > 0.: best_fit_model = 2 else: best_fit_model = 1 logger.log("log_evidence_diff =", log_evidence) if 4 == model_switch: npmcv = npm_class.k_fold_cv(dataX, dataY, k=10) pmcv = pm_class.k_fold_cv(dataX, dataY, k=10) cv = pmcv - npmcv if cv > 0.: best_fit_model = 2 else: best_fit_model = 1 # 4 # if 1 == best_fit_model: npm_class.wrap_env(env) if 2 == best_fit_model: pm_class.wrap_env(env) if 11 == model_switch or 111 == model_switch: # only for comparison with pilco npm_class.thdot_clip_value = 50. # only for comparison with pilco # In this case, the resulting real-world velocity exceeds the max_speed given for accelerating learning process for main results. npm_class.wrap_env_pilco(env) # only for comparison with pilco npm_class.logger_parameter() best_fit_model = model_switch logger.log("flag in custom_pendulum_wrap =", model_switch) logger.log("actually_selected_model=", best_fit_model) if (best_fit_model + model_switch) > 0: logger.log("init_state=", init_s) return best_fit_model
real_dynamics.wrap_env(env.env) dn ="./data_debug2/" fn1="debug_input.csv" fn2="debug_output.csv" #pendulum_real_model.generate_test_samples(dirname=dn,filename1=fn1,filename2=fn2) dataX = np.loadtxt(dn+fn1, delimiter=',') dataY = np.loadtxt(dn+fn2, delimiter=',') test_class = PendulumPM(dataX,dataY) test_class.wrap_env(env.env) test_class.logger_parameter() import pendulum_pm # add test2_class = pendulum_pm.PendulumPM(dataX,dataY) # add test2_class.wrap_env(env.env) # add test2_class.logger_parameter() # add ''' # add episode_count=0 while episode_count<1: env.reset() env.env.state= np.array([[np.pi],[0.]]) while True: ac = env.action_space.sample() ob, rew, new, _ = env.step(ac) env.render() if new: episode_count +=1