log_dir = "./MultiEnv/Data" # generate TrainENV file # TrainEnvNum = 500 # env = TfEnv(GridBase(params)) # env._wrapped_env.generate_grid=True # env._wrapped_env.generate_b0_start_goal=True # for i in range(TrainEnvNum): # env.reset() # params = dict( # env=env, # ) # joblib.dump(params,log_dir+'/TrainEnv'+'/env_'+str(i)+'.pkl') # plot_env(env,save=True,path=log_dir+'/TrainEnv'+'/Map_'+str(i)+'.pdf') # generate TestENV file TestEnvNum = 50 env = TfEnv(GridBase(params)) env._wrapped_env.generate_grid = True env._wrapped_env.generate_b0_start_goal = True for i in range(TestEnvNum): env.reset() params = dict(env=env, ) joblib.dump(params, log_dir + '/TestEnv2' + '/env_' + str(i) + '.pkl') plot_env(env, save=True, path=log_dir + '/TestEnv2' + '/Map_' + str(i) + '.pdf')
params['obs_len'] = len(params['observe_directions']) params['num_state'] = params['grid_n'] * params['grid_m'] params['traj_limit'] = 4 * (params['grid_n'] * params['grid_m'] ) # 4 * (params['grid_n'] + params['grid_m']) params['R_step'] = [params['R_step']] * params['num_action'] params['R_step'][params['stayaction']] = params['R_stay'] env_ref = joblib.load('./env.pkl')['env'] grid = env_ref._wrapped_env.grid b0 = env_ref._wrapped_env.b0 start_state = env_ref._wrapped_env.start_state goal_state = env_ref._wrapped_env.goal_state env = TfEnv( GridBase(params, grid=grid, b0=b0, start_state=start_state, goal_state=goal_state)) env._wrapped_env.generate_grid = False env._wrapped_env.generate_b0_start_goal = False env.reset() log_dir = "./Data/obs_1goal20step0stay_1_gru" tabular_log_file = osp.join(log_dir, "progress.csv") text_log_file = osp.join(log_dir, "debug.log") params_log_file = osp.join(log_dir, "params.json") pkl_file = osp.join(log_dir, "params.pkl") logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file)
from rllab.misc.instrument import stub, run_experiment_lite from qmdp_policy import QMDPPolicy from sandbox.rocky.tf.policies.categorical_gru_policy import CategoricalGRUPolicy import lasagne.nonlinearities as NL from sandbox.rocky.tf.envs.base import TfEnv from rllab.misc import logger import os.path as osp import tensorflow as tf from sandbox.rocky.tf.samplers.batch_sampler import BatchSampler import joblib import dill import numpy as np #stub(globals()) env = TfEnv(GridBase()) env._wrapped_env.generate_grid = True env._wrapped_env.generate_b0_start_goal = True env.reset() env._wrapped_env.generate_grid = False env._wrapped_env.generate_b0_start_goal = False env_img = env._wrapped_env.env_img goal_img = env._wrapped_env.goal_img b0_img = env._wrapped_env.b0_img start_state = env._wrapped_env.start_state params = dict(env=env, ) # file = open('env.pkl', 'wb') # dill.dump(params, file) # with open('env.pkl', 'rb') as file: