DISCOUNT_FACTOR = 0.99 arg.NUM_SAMPLES=2 arg.NUM_EP = 200 arg.NUM_IT = 2 # number of iteration for gradient descent arg.NUM_thetas = 1 arg.ADAM_LR = 0.25 arg.LR_STEP = 50 arg.LR_STOP = 0.1 arg.lr_gamma = 0.95 arg.PI_STD=1 arg.goal_radius_range=[0.1,0.3] arg.TERMINAL_VEL = 0.025 arg.goal_radius_range=[0.15,0.3] arg.std_range = [0.02,0.3,0.02,0.3] arg.TERMINAL_VEL = 0.025 # terminal velocity? # norm(action) that you believe as a signal to stop 0.1. arg.DELTA_T=0.2 arg.EPISODE_LEN=35 number_updates=100 # agent convert to torch model import policy_torch baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip') agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32) # loading enviorment, same as training env=firefly_accac.FireflyAccAc(arg) # ---seting the env for inverse---- # TODO, move it to a function of env env.agent_knows_phi=False
filename = '20191111-151539-12011329' # agent information learning_arg = torch.load( '../firefly-monkey-data/data/20191111-151539_arg.pkl') DISCOUNT_FACTOR = learning_arg['argument']['DISCOUNT_FACTOR'] gains_range = [1, 1, 1, 1] #learning_arg['argument']['gains_range'] std_range = [0.05, 0.05, 0.05, 0.05] #learning_arg['argument']['std_range'] goal_radius_range = [0.9, 0.9] #learning_arg['argument']['goal_radius_range'] #gains_range = learning_arg['argument']['gains_range'] #std_range = learning_arg['argument']['std_range'] #goal_radius_range = learning_arg['argument']['goal_radius_range'] arg.WORLD_SIZE = learning_arg['argument']['WORLD_SIZE'] arg.DELTA_T = learning_arg['argument']['DELTA_T'] arg.EPISODE_TIME = learning_arg['argument']['EPISODE_TIME'] arg.EPISODE_LEN = learning_arg['argument']['EPISODE_LEN'] # df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv', # usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward', # 'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward', # 'obs noise std angular', 'goal radius']) # # DISCOUNT_FACTOR = df['discount_factor'][0] # gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()), # np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())] # # std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(), # df['process noise std angular'].min(), df['process noise std angular'].max()] # goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]