arg.NUM_SAMPLES=2 arg.NUM_EP = 200 arg.NUM_IT = 2 # number of iteration for gradient descent arg.NUM_thetas = 1 arg.ADAM_LR = 0.25 arg.LR_STEP = 50 arg.LR_STOP = 0.1 arg.lr_gamma = 0.95 arg.PI_STD=1 arg.goal_radius_range=[0.1,0.3] arg.TERMINAL_VEL = 0.025 arg.goal_radius_range=[0.15,0.3] arg.std_range = [0.02,0.3,0.02,0.3] arg.TERMINAL_VEL = 0.025 # terminal velocity? # norm(action) that you believe as a signal to stop 0.1. arg.DELTA_T=0.2 arg.EPISODE_LEN=35 number_updates=100 # agent convert to torch model import policy_torch baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip') agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32) # loading enviorment, same as training env=firefly_accac.FireflyAccAc(arg) # ---seting the env for inverse---- # TODO, move it to a function of env env.agent_knows_phi=False
learning_arg = torch.load( '../firefly-monkey-data/data/20191111-151539_arg.pkl') DISCOUNT_FACTOR = learning_arg['argument']['DISCOUNT_FACTOR'] gains_range = [1, 1, 1, 1] #learning_arg['argument']['gains_range'] std_range = [0.05, 0.05, 0.05, 0.05] #learning_arg['argument']['std_range'] goal_radius_range = [0.9, 0.9] #learning_arg['argument']['goal_radius_range'] #gains_range = learning_arg['argument']['gains_range'] #std_range = learning_arg['argument']['std_range'] #goal_radius_range = learning_arg['argument']['goal_radius_range'] arg.WORLD_SIZE = learning_arg['argument']['WORLD_SIZE'] arg.DELTA_T = learning_arg['argument']['DELTA_T'] arg.EPISODE_TIME = learning_arg['argument']['EPISODE_TIME'] arg.EPISODE_LEN = learning_arg['argument']['EPISODE_LEN'] # df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv', # usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward', # 'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward', # 'obs noise std angular', 'goal radius']) # # DISCOUNT_FACTOR = df['discount_factor'][0] # gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()), # np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())] # # std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(), # df['process noise std angular'].min(), df['process noise std angular'].max()] # goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()] env = gym.make('FireflyTorch-v0') #,PROC_NOISE_STD,OBS_NOISE_STD)
from monkey_functions import * from Config import Config arg = Config() arg.presist_phi=False arg.agent_knows_phi=False arg.goal_distance_range=[0.4,1] arg.gains_range =[0.05,1.5,pi/4,pi/1] arg.goal_radius_range=[0.05,0.3] arg.std_range = [0.08,0.3,pi/80,pi/80*5] arg.mag_action_cost_range= [0.0001,0.001] arg.dev_action_cost_range= [0.0001,0.005] arg.dev_v_cost_range= [0.1,0.5] arg.dev_w_cost_range= [0.1,0.5] arg.TERMINAL_VEL = 0.1 arg.DELTA_T=0.1 arg.EPISODE_LEN=100 arg.agent_knows_phi=False DISCOUNT_FACTOR = 0.99 arg.sample=100 arg.batch = 70 # arg.NUM_SAMPLES=1 # arg.NUM_EP=1 arg.NUM_IT = 1 arg.NUM_thetas = 1 arg.ADAM_LR = 0.0002 arg.LR_STEP = 20 arg.LR_STOP = 0.5 arg.lr_gamma = 0.95 arg.PI_STD=1 arg.presist_phi=False arg.cost_scale=1
# acc mdp model env = firefly_mdp.FireflyMDP(arg) model = TD3.load('trained_agent/mdp_noise_1000000_2_9_18_8') model.set_env(env) # 1d real model # easy arg.gains_range = [0.99, 1] arg.goal_radius_range = [25, 25.3] arg.std_range = [0.5, 0.51, 49.5, 50] arg.mag_action_cost_range = [0.00001, 0.000011] arg.dev_action_cost_range = [0.00001, 0.000012] arg.TERMINAL_VEL = 20 arg.DELTA_T = 0.2 arg.EPISODE_LEN = 50 arg.training = True arg.presist_phi = False arg.agent_knows_phi = False env = ffacc_real.FireflyTrue1d_real(arg) # hard arg.gains_range = [0.1, 5] arg.goal_radius_range = [1, 50] arg.std_range = [0.01, 2, 0.01, 100] arg.mag_action_cost_range = [0.00001, 0.0001] arg.dev_action_cost_range = [0.00001, 0.00005] arg.TERMINAL_VEL = 20 arg.DELTA_T = 0.2 arg.EPISODE_LEN = 50 arg.training = True arg.presist_phi = False
arg.init_action_noise = 0.5 arg.goal_distance_range = [0.2, 1] arg.mag_action_cost_range = [0.1, 1.] arg.dev_action_cost_range = [0.1, 1.] arg.dev_v_cost_range = [0.1, 1.] arg.dev_w_cost_range = [0.1, 1.] # arg.goal_distance_range=[0.01,0.99] arg.gains_range = [0.35, 0.45, pi / 2 - 0.1, pi / 2 + 0.1] # arg.goal_radius_range=[0.07,0.2] arg.std_range = [0.1, 0.7, 0.1, 0.7] # arg.mag_action_cost_range= [0.0001,0.0005] # arg.dev_action_cost_range= [0.0001,0.0005] arg.REWARD = 100 arg.TERMINAL_VEL = 0.1 arg.DELTA_T = 0.1 arg.EPISODE_LEN = 40 arg.training = True arg.presist_phi = False arg.agent_knows_phi = True arg.cost_scale = 1 env = ffacc_real.FireflyFinal2(arg) env.no_skip = True modelname = None # modelname='re_re_skipcostscale_200000_4_15_13_16_17_19' note = 're' from stable_baselines3 import SAC # # 1d test # arg.initial_uncertainty_range=[0,1] # env=ffacc_real.Simple1d(arg) # env.no_skip=False