def test_gray_scale_observation(env_id, keep_dim): gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True) rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False) wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) assert rgb_env.observation_space.shape[-1] == 3 seed = 0 gray_env.seed(seed) wrapped_env.seed(seed) gray_obs = gray_env.reset() wrapped_obs = wrapped_env.reset() if keep_dim: assert wrapped_env.observation_space.shape[-1] == 1 assert len(wrapped_obs.shape) == 3 wrapped_obs = wrapped_obs.squeeze(-1) else: assert len(wrapped_env.observation_space.shape) == 2 assert len(wrapped_obs.shape) == 2 # ALE gray scale is slightly different, but no more than by one shade assert np.allclose(gray_obs.astype('int32'), wrapped_obs.astype('int32'), atol=1)
def make_atari(env_id, max_episode_steps=None): env = gym.make(env_id) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
def test_text_envs(): env = gym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def test_flatten_observation(env_id): env = gym.make(env_id) wrapped_env = FlattenObservation(env) obs = env.reset() wrapped_obs = wrapped_env.reset() assert len(obs.shape) == 3 assert len(wrapped_obs.shape) == 1 assert wrapped_obs.shape[0] == obs.shape[0] * obs.shape[1] * obs.shape[2]
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def make_robotics_env(env_id, seed, rank=0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = FlattenDictWrapper(env, ['observation', 'desired_goal']) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success',)) env.seed(seed) return env
def test_resize_observation(env_id, shape): env = gym.make(env_id) env = ResizeObservation(env, shape) assert env.observation_space.shape[-1] == 3 obs = env.reset() if isinstance(shape, int): assert env.observation_space.shape[:2] == (shape, shape) assert obs.shape == (shape, shape, 3) else: assert env.observation_space.shape[:2] == tuple(shape) assert obs.shape == tuple(shape) + (3,)
def main(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed']) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) with sess.as_default() as sess: folder = './data/policy/' + kwargs['env'] paths = pickle.load(open(folder + '/paths.pickle', 'rb')) niters = paths.get_current_episode_size() // 100 train_data, test_data = split_data(paths, niters) dimo = train_data[0]['o'].shape[-1] dims = [dimo] env = gym.make(kwargs['env'], obs_type=kwargs['obs_type'], fixed_num_of_contact=kwargs['fixed_num_of_contact']) feature_net = FeatureNet( dims, fixed_num_of_contact=kwargs['fixed_num_of_contact'], contact_dim=env.contact_dim, sess=sess, output=kwargs['prediction'], process_type=kwargs['process_type'], feature_dim=kwargs['feature_dim'], feature_layer=kwargs['feature_layer']) sess.run(tf.global_variables_initializer()) for i in range(niters): start = timer.time() feature_net.train(train_data[i]) feature_net.test(test_data[i]) logger.logkv("iter", i) logger.logkv("iter_time", timer.time() - start) logger.dumpkvs() if i == 0: sess.graph.finalize()
def make_mujoco_env(env_id, seed, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank)) env = Monitor(env, logger_path, allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def test_transform_reward(env_id): # use case #1: scale scales = [0.1, 200] for scale in scales: env = gym.make(env_id) wrapped_env = TransformReward(gym.make(env_id), lambda r: scale * r) action = env.action_space.sample() env.seed(0) env.reset() wrapped_env.seed(0) wrapped_env.reset() _, reward, _, _ = env.step(action) _, wrapped_reward, _, _ = wrapped_env.step(action) assert wrapped_reward == scale * reward del env, wrapped_env # use case #2: clip min_r = -0.0005 max_r = 0.0002 env = gym.make(env_id) wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r)) action = env.action_space.sample() env.seed(0) env.reset() wrapped_env.seed(0) wrapped_env.reset() _, reward, _, _ = env.step(action) _, wrapped_reward, _, _ = wrapped_env.step(action) assert abs(wrapped_reward) < abs(reward) assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002 del env, wrapped_env # use case #3: sign env = gym.make(env_id) wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r)) env.seed(0) env.reset() wrapped_env.seed(0) wrapped_env.reset() for _ in range(1000): action = env.action_space.sample() _, wrapped_reward, done, _ = wrapped_env.step(action) assert wrapped_reward in [-1.0, 0.0, 1.0] if done: break del env, wrapped_env
def make_env(subrank=None, obs_type = 'original', fixed_num_of_contact = 0): env = gym.make(env_name, obs_type = obs_type, fixed_num_of_contact = fixed_num_of_contact) if subrank is not None and logger.get_dir() is not None: try: from mpi4py import MPI mpi_rank = MPI.COMM_WORLD.Get_rank() except ImportError: MPI = None mpi_rank = 0 logger.warn('Running with a single MPI process. This should work, but the results may differ from the ones publshed in Plappert et al.') max_episode_steps = env._max_episode_steps env = Monitor(env, os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) # hack to re-expose _max_episode_steps (ideally should replace reliance on it downstream) env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps) return env
def test_frame_stack(env_id, num_stack, lz4_compress): env = gym.make(env_id) shape = env.observation_space.shape env = FrameStack(env, num_stack, lz4_compress) assert env.observation_space.shape == (num_stack, ) + shape obs = env.reset() obs = np.asarray(obs) assert obs.shape == (num_stack, ) + shape for i in range(1, num_stack): assert np.allclose(obs[i - 1], obs[i]) obs, _, _, _ = env.step(env.action_space.sample()) obs = np.asarray(obs) assert obs.shape == (num_stack, ) + shape for i in range(1, num_stack - 1): assert np.allclose(obs[i - 1], obs[i]) assert not np.allclose(obs[-1], obs[-2])
def test_clip_action(): # mountaincar: action-based rewards make_env = lambda: gym.make('MountainCarContinuous-v0') env = make_env() wrapped_env = ClipAction(make_env()) seed = 0 env.seed(seed) wrapped_env.seed(seed) env.reset() wrapped_env.reset() actions = [[.4], [1.2], [-0.3], [0.0], [-2.5]] for action in actions: obs1, r1, d1, _ = env.step( np.clip(action, env.action_space.low, env.action_space.high)) obs2, r2, d2, _ = wrapped_env.step(action) assert np.allclose(r1, r2) assert np.allclose(obs1, obs2) assert d1 == d2
def main(**kwargs): # configure logger, disable logging in child MPI processes (with rank > 0) # folder = '../dataset/sequence/HandManipulateEgg-v0/seed' # obs = {} # acs = [] # for i in range(2): # # with open(folder + str(i) + '-dict.pickle', 'wb') as pickle_file: # # dict, array, int # o, a, fixed_num_of_contact = pickle.load(open(folder + str(i+1) + '-dict.pickle', 'rb')) # for key in o: # if key in obs: # obs[key] = np.concatenate([obs[key], o[key]], axis = 0) # else: # obs[key] = o[key] # acs.append(a) # acs = np.concatenate(acs, axis = 0) # folder = './dataset/sequence/HandManipulateEgg-v0/2seeds' # with open(folder + '-dict.pickle', 'wb') as pickle_file: # print(folder) # filtered_obs = {} # for key in obs: # if key in ['geom1s', 'geom2s', 'positions', 'force', 'object_position']: # filtered_obs[key] = obs[key] # # pickle.dump([filtered_obs, acs, fixed_num_of_contact], pickle_file) # # # ../sequence/HandManipulateEgg-v09/5seeds-dict.pickle folder = '../dataset/sequence/HandManipulateEgg-v0/seed1-dict.pickle' o, a, fixed_num_of_contact = pickle.load(open(folder, 'rb')) env = gym.make(kwargs['env'], obs_type = kwargs['obs_type'], fixed_num_of_contact = [fixed_num_of_contact, True]) ngeoms = env.sim.model.ngeom obs, object_info = expand_data(o, ngeoms, fixed_num_of_contact) folder = './dataset/HandManipulateEgg-v0/50000obs.pickle' obs = obs.reshape((-1, *obs.shape[2:])) with open(folder, 'wb') as pickle_file: pickle.dump(obs, pickle_file)
def main(**kwargs): # configure logger, disable logging in child MPI processes (with rank > 0) arg_list = [] for key in kwargs.keys(): arg_list.append('--' + key) arg_list.append(str(kwargs[key])) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(arg_list) extra_args = parse_cmdline_kwargs(unknown_args) params = args.__dict__ config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) env = gym.make(kwargs['env'], obs_type=kwargs['obs_type'], fixed_num_of_contact=kwargs['fixed_num_of_contact']) global_largest = 0 for _ in range(200): horizon = 100 o = env.reset() d = False t = 0 largest = 0 while t < horizon and d is False: a = env.action_space.sample() num_of_contacts = env.contact_num if num_of_contacts > largest: largest = num_of_contacts if largest > global_largest: global_largest = largest o, r, d, _ = env.step(a) t = t + 1 print("largest: ", largest) print("global_largest: ", global_largest) env.close()
def __init__(self, env_name): env = gym.make(env_name) self.env = env self.env_id = env.spec.id self.sim = env.env.sim self._horizon = env.spec.timestep_limit try: self._action_dim = self.env.env.action_dim except AttributeError: self._action_dim = self.env.env.action_space.shape[0] self._observation_dim = self.env.env.obs_dim try: self._num_agents = self.env.env.num_agents except AttributeError: self._num_agents = 1 # Specs self.spec = EnvSpec(self._observation_dim, self._action_dim, self._horizon, self._num_agents)
def main(**kwargs): config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) env = gym.make(kwargs['env'][0], obs_type=kwargs['obs_type'][0], fixed_num_of_contact=kwargs['fixed_num_of_contact'][0]) num_episodes = 1 horizon = 100 feature_net = pickle.load( open('./saved/' + str(kwargs['env'][0]) + '-model.pickle', 'rb')) for _ in range(num_episodes): o = env.reset() d = False t = 0 while t < horizon and d is False: a = env.action_space.sample() o, r, d, _ = env.step(a) env.render() time.sleep(1) prediction = feature_net.predict_single(o['observation']) t = t + 1 env.sim.data.set_joint_qpos('object:joint', prediction[-7:]) env.sim.forward() env.render() time.sleep(1) env.sim.data.set_joint_qpos('object:joint', o['observation'][-7:]) env.sim.forward() env.close()
def main(**kwargs): import dill as pickle from datetime import datetime exp_dir = os.getcwd() + '/data/feature_net/' + kwargs['input_label'][0] + kwargs['output_label'][0] + '/' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95) sess = tf.Session(config=config) mode = kwargs['mode'][0] visualize_training_data = kwargs['visualize_training_data'][0] visualize_testing_data = kwargs['visualize_testing_data'][0] visualize_new_data = kwargs['visualize_new_data'][0] if mode == 'restore': saver = tf.train.import_meta_graph(exp_dir + '-999.meta') saver.restore(sess, tf.train.latest_checkpoint(exp_dir)) graph = tf.get_default_graph() with sess.as_default() as sess: # folder = './data/policy/' + kwargs['env'][0] # buffer, fixed_num_of_contact = pickle.load(open('../saved/HandManipulateEgg-v0-fix9.pickle', 'rb')) buffer = {} name = 's1' paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb')) for key in paths: buffer[key] = paths[key] for name in ['s2', 's4', 's5', 's6', 'soft3']: paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb')) for key in paths: buffer[key] = np.concatenate([buffer[key], paths[key]], axis = 0) env = gym.make(kwargs['env'][0], obs_type = kwargs['obs_type'][0], fixed_num_of_contact = fixed_num_of_contact) for key in buffer: buffer[key] = buffer[key][:int(1e6)] niters = buffer['positions'].shape[0] // 100 print("total iteration: ", niters) ngeoms = env.sim.model.ngeom input_label = kwargs['input_label'][0] output_label = kwargs['output_label'][0] start = time.time() # paths = expand_data(buffer, ngeoms, fixed_num_of_contact, input_label, output_label) # print("expand data:", time.time() - start) paths = buffer start = time.time() train_data, test_data, vis_data, vis_data_test = split_data(paths, niters) print("split data:", time.time() - start) train_data['object_position'] = train_data['object_position'][:, :, :3] vis_data['original_object_position'] = vis_data['object_position'] vis_data_test['original_object_position'] = vis_data_test['object_position'] test_data['object_position'] = test_data['object_position'][:, :, :3] labels_to_dims = {} labels_to_dims['contacts'] = 3+6+ngeoms labels_to_dims['positions'] = 3 # labels_to_dims['object_position'] = 7 labels_to_dims['object_position'] = 3 labels_to_dims['joint_position'] = 24 labels_to_dims['object_vel'] = 6 labels_to_dims['joint_vel'] = 24 labels_to_dims['geoms'] = ngeoms dims = (labels_to_dims[input_label], labels_to_dims[output_label]) print("preparation done") num_episodes = 1 horizon = 100 if visualize_training_data: visualize_data(vis_data, env, fixed_num_of_contact, feature_net, mode, input_label) if visualize_testing_data: visualize_data(vis_data_test, env, fixed_num_of_contact, feature_net, mode, input_label)
def env_fn(): env = gym.make('CartPole-v0') env.seed(0) return env
def main(**kwargs): config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) exp_dir = os.getcwd() + '/data/feature_net/' + kwargs['input_label'][ 0] + kwargs['output_label'][0] + '/' mode = kwargs['mode'][0] if mode == 'restore': rotation_saver = tf.train.import_meta_graph(exp_dir + '-999.meta') rotation_saver.restore(sess, tf.train.latest_checkpoint(exp_dir)) graph = tf.get_default_graph() with sess.as_default() as sess: input_label = kwargs['input_label'][0] output_label = kwargs['output_label'][0] buffer = {} name = '1' paths, fixed_num_of_contact = pickle.load( open( '../saved/trained/SoftHandManipulateEgg-v080-' + name + '-dict.pickle', 'rb')) for key in paths: buffer[key] = paths[key] for name in [str(i) for i in range(2, 17)]: paths, fixed_num_of_contact = pickle.load( open( '../saved/trained/SoftHandManipulateEgg-v080-' + name + '-dict.pickle', 'rb')) for key in paths: buffer[key] = np.concatenate([buffer[key], paths[key]], axis=0) env = gym.make(kwargs['env'][0], obs_type=kwargs['obs_type'][0], fixed_num_of_contact=fixed_num_of_contact) batch_size = 100 paths = data_filter(buffer, fixed_num_of_contact, batch_size) niters = paths['positions'].shape[0] // batch_size print("total iteration: ", niters) print("total number of data: ", paths['positions'].shape[0]) train_data, test_data, _, _ = split_data(paths, niters) train_data['object_position'] = train_data['object_position'][:, :, :3] test_data['object_position'] = test_data['object_position'][:, :, :3] labels_to_dims = {} labels_to_dims['positions'] = 3 rotation_model = RotationModel( dims=[labels_to_dims[input_label]], sess=sess, fixed_num_of_contact=fixed_num_of_contact, feature_layers=kwargs['feature_layers'][0], output_layers=kwargs['output_layers'][0], learning_rate=kwargs['learning_rate'][0]) if mode == 'train': sess.run(tf.global_variables_initializer()) for i in range(niters): input, out = train_data[input_label][i], train_data[ output_label][i] pred = rotation_model.train(input, out) logger.logkv("iter", i) logger.dumpkvs() rotation_model.save_model(exp_dir, 999) if mode == 'restore': rotation_model.restore() for i in range(1): logger.logkv("iter", i) _, _ = rotation_model.restore_predict( train_data[input_label][i], train_data[output_label][i]) logger.dumpkvs()
def main(**kwargs): z_dim = kwargs['z_dim'] trans_mode = kwargs['trans_mode'] epochs = kwargs['epochs'] include_action = kwargs['include_action'] label = kwargs['label'] dataset = kwargs['data_path'] feature_dims = kwargs['feature_dims'] mode = kwargs['mode'] n = kwargs['n'] k = kwargs['k'] encoder_lr = kwargs['encoder_lr'] decoder_lr = kwargs['decoder_lr'] decoder_feature_dims = kwargs['decoder_feature_dims'] process_type = kwargs['process_type'] if kwargs['data_path'] == '../dataset/sequence/HandManipulateEgg-v0/5seeds-dict.pickle': kwargs['dataset'] = 'trained_5seeds' elif kwargs['data_path'] == '../dataset/untrained/HandManipulateEgg-v0/5seeds-dict.pickle': kwargs['dataset'] = 'untrained_5seeds' elif kwargs['data_path'] == '../dataset/HandManipulateEgg-v09-dict.pickle': kwargs['dataset'] = 'trained_1seed' exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed']) if kwargs['debug']: save_dir = '../saved_cpc/' + str(label) + '/' + str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained/debug' # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained/debug' else: save_dir = '../saved_cpc/' + str(label) + '/' + str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained' # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95) sess = tf.Session(config=config) obs, acts, fixed_num_of_contact = pickle.load(open(dataset, 'rb')) env = gym.make(kwargs['env'], obs_type = kwargs['obs_type'], fixed_num_of_contact = [fixed_num_of_contact, True]) ngeoms = env.sim.model.ngeom obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact) if kwargs['normalize_data']: obs = normalize_obs(obs) next_obs = obs[:, 1:] obs = obs[:, :-1] N, L, _, contact_point_dim = obs.shape N, L, action_dim = acts.shape obs_dim = (fixed_num_of_contact, contact_point_dim) train_data, test_data = split_data([obs, acts, next_obs, object_info]) batch_size = 2 if mode in ['restore', 'store_weights']: saver = tf.train.import_meta_graph(save_dir + '-999.meta') pur_save_dir = save_dir[:-8] saver.restore(sess, tf.train.latest_checkpoint(pur_save_dir)) graph = tf.get_default_graph() with sess.as_default() as sess: encoder = Encoder(z_dim, fixed_num_of_contact, contact_point_dim, feature_dims) trans = Transition(z_dim, action_dim, mode = trans_mode) cpc = CPC(sess, encoder, trans, encoder_lr, fixed_num_of_contact, contact_point_dim, action_dim, include_action = include_action, type = 1*(label=='cpc1') + 2*(label=='cpc2'), n_neg = n, process_type = process_type, mode = mode) cpc_epochs, decoder_epochs = epochs if mode == 'train': sess.run(tf.global_variables_initializer()) logger.log("training started") for epoch in range(cpc_epochs): # train_cpc(cpc, epoch, train_data, batch_size, n, k) test_cpc(cpc, epoch, test_data, batch_size, n, k) logger.logkv("epoch", epoch) logger.dumpkvs() cpc.save_model(save_dir, 999) """decoder""" logger.log("Done with cpc training.") decoder = Decoder(cpc, sess, z_dim, decoder_feature_dims, fixed_num_of_contact, contact_point_dim, decoder_lr) uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))] sess.run(tf.variables_initializer(uninit_vars)) for epoch in range(decoder_epochs): train_decoder(decoder, epoch, train_data, batch_size, n, k) test_decoder(decoder, epoch, test_data, batch_size, n, k) logger.logkv("epoch", (epoch + cpc_epochs)) logger.dumpkvs() print("model saved in", save_dir) elif mode == 'restore': decoder = Decoder(cpc, sess, z_dim, decoder_feature_dims, fixed_num_of_contact, contact_point_dim, decoder_lr) uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))] sess.run(tf.variables_initializer(uninit_vars)) print("initialized") for epoch in range(100): train_decoder(decoder, epoch, train_data, batch_size, n, k) test_decoder(decoder, epoch, test_data, batch_size, n, k) logger.logkv("epoch", epoch) logger.dumpkvs() print("logging to", exp_dir) elif mode == 'store_weights': old = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='') old = sess.run(old) save_dir = './saved_model/' + str(label) + '/' + str(process_type)+ '/trained/' with open(save_dir + 'weights.pickle', 'wb') as pickle_file: pickle.dump(old, pickle_file) print("weights saved to", save_dir) save_dir = '/home/vioichigo/try/tactile-baselines/saved_model/cpc2/trained' with open(save_dir + 'params.pickle', 'wb') as pickle_file: pickle.dump([z_dim, fixed_num_of_contact, contact_point_dim, action_dim, encoder_lr, feature_dims, trans_mode, label, include_action], pickle_file) tf.reset_default_graph() print("graph reset successfully")
def env_fn(): return lambda: gym.make('PongNoFrameskip-v4')
def make_env( env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None, obs_type='original', fixed_num_of_contact=0, ): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*', '', env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro( game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env_kwargs['obs_type'] = obs_type env_kwargs['fixed_num_of_contact'] = fixed_num_of_contact env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def main(**kwargs): # configure logger, disable logging in child MPI processes (with rank > 0) arg_list = [] for key in kwargs.keys(): arg_list.append('--' + key) arg_list.append(str(kwargs[key])) arg_parser = common_arg_parser() buffer_size = int(kwargs['buffer_size']) args, unknown_args = arg_parser.parse_known_args(arg_list) extra_args = parse_cmdline_kwargs(unknown_args) params = args.__dict__ import copy params = copy.deepcopy(params) if args.obs_type == 'object': params['label'] = args.obs_type elif args.obs_type == 'original': params['label'] = 'object+joint' elif args.obs_type == 'contact': params['label'] = 'object+contact(' + args.process_type + ')' elif args.obs_type == 'full_contact': params['label'] = 'object+joint+contact(' + args.process_type + ')' exp_dir = os.getcwd() + '/data/' + EXP_NAME logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(params, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) folder = './data/policy/' + str(args.env) obs_type = params['obs_type'] fixed_num_of_contact = params['fixed_num_of_contact'] env = gym.make(params['env'], obs_type=obs_type, fixed_num_of_contact=fixed_num_of_contact) policy = pickle.load( open('./data/policy/' + str(args.env)[4:] + '/policy.pickle', 'rb')) T = env._max_episode_steps paths = generate_paths(policy, T, obs_type, params['env'], fixed_num_of_contact, build_env(args), contact_dim=env.contact_dim, buffer_size=buffer_size) paths = process_episode(paths.all_samples(), env.contact_dim, fixed_num_of_contact) folder = '../saved/trained/' + str(args.env) + str(fixed_num_of_contact) with open(folder + '-18-dict.pickle', 'wb') as pickle_file: pickle.dump([paths, fixed_num_of_contact], pickle_file)
def make_fn(): env = gym.make('PongNoFrameskip-v4') return env
def main(**kwargs): exp_dir = os.getcwd( ) + '/cpc_model/' + kwargs['process_type'][0] + '/n200-8' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) obs, acts, fixed_num_of_contact = pickle.load( open('../untrained/HandManipulateEgg-v0/5seeds-dict.pickle', 'rb')) include_action = kwargs['include_action'][0] env = gym.make(kwargs['env'][0], obs_type=kwargs['obs_type'][0], fixed_num_of_contact=[fixed_num_of_contact, True]) ngeoms = env.sim.model.ngeom obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact) next_obs = obs[:, 1:] obs = obs[:, :-1] N, L, _, contact_point_dim = obs.shape N, L, action_dim = acts.shape obs_dim = (fixed_num_of_contact, contact_point_dim) z_dim = 8 lr = 1e-3 epochs = 100 batch_size = 2 n = 200 k = 1 encoder = Encoder(z_dim, obs_dim[1], fixed_num_of_contact).cuda() if include_action: trans = Transition(z_dim, action_dim).cuda() else: trans = Transition(z_dim, 0).cuda() decoder = Decoder(z_dim, 3).cuda() optim_cpc = optim.Adam(list(encoder.parameters()) + list(trans.parameters()), lr=lr) optim_dec = optim.Adam(decoder.parameters(), lr=lr) train_data, test_data = split_data([obs, acts, next_obs]) for epoch in range(epochs): train_cpc(encoder, trans, optim_cpc, epoch, train_data, batch_size, n, k, include_action) test_cpc(encoder, trans, epoch, test_data, batch_size, n, k, include_action) logger.logkv("epoch", epoch) logger.dumpkvs() train_data, test_data = split_data([obs, acts, next_obs, object_info]) for epoch in range(100): train_decoder(decoder, encoder, optim_dec, epoch, train_data, batch_size, include_action, n, k=1) test_decoder(decoder, encoder, epoch, test_data, batch_size, include_action, n, k=1) logger.logkv("epoch", epoch) logger.dumpkvs()
def main(**kwargs): config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) env = gym.make( kwargs['env'][0], obs_type=kwargs['obs_type'][0], fixed_num_of_contact=[kwargs['fixed_num_of_contact'][0], False]) num_episodes = 100 horizon = 100 horizon = 1 fixed_num_of_contact = kwargs['fixed_num_of_contact'][0] mode = kwargs['mode'][0] if mode == 'load': epoch = 10 obs, predictions = pickle.load( open('./dataset/supervised-saved/' + str(epoch) + '.pickle', 'rb')) contact_num = fixed_num_of_contact B, D = obs.shape contact_info = obs[:, :env.contact_dim].reshape( (B, fixed_num_of_contact, -1)) object_info = obs[:, env.contact_dim:] for contact, o, pred in zip(contact_info, object_info, predictions): # env.sim.data.set_joint_qpos('target:joint', o[-7:]+np.ones(7)) object_position = o[-7:] object_vel = o[48:48 + 6] joint_position = o[:24] joint_vel = o[24:48] env.sim.data.set_joint_qpos('object:joint', object_position) env.sim.data.set_joint_qvel('object:joint', object_vel) for idx in range(len(env.sim.model.joint_names)): name = env.sim.model.joint_names[idx] if name.startswith('robot'): env.sim.data.set_joint_qpos(name, joint_position[idx]) env.sim.data.set_joint_qvel(name, joint_vel[idx]) pos = object_position[:-4] num_points = (np.sum(contact, axis=1) != 0).sum() if num_points != 0: print(((pos - pred)**2).sum(), np.abs(pos - pred).sum(), num_points) else: print(((pos - pred)**2).sum(), np.abs(pos - pred).sum(), pos) env.render() time.sleep(0.2) env.sim.data.set_joint_qpos( 'object:joint', np.concatenate((pred, object_position[-4:]), axis=-1)) env.render() time.sleep(1) env.close() # # # note: 46 is object # for contact_idx in range(contact_num): # site_name = 'contact{}'.format(contact_idx+1) # site_id = env.sim.model.site_name2id(site_name) # env.sim.model.site_pos[site_id] = contact_info[contact_idx][-9:-6] # env.sim.forward() # time.sleep(1) # env.render() # st() else: for _ in range(num_episodes): o = env.reset() d = False t = 0 while t < horizon and d is False: a = env.action_space.sample() o, r, d, _ = env.step(a) env.render() t = t + 1 # contacts = o['observation'][:env.contact_dim].reshape((fixed_num_of_contact, -1)) # # env.sim.data.set_joint_qpos('target:joint', o['observation'][-7:]+np.ones(7)) # contact_num = env.contact_num # # for idx in range(env.sim.model.ngeom): # # print(idx, env.sim.model.geom_id2name(idx)) # # note: 46 is object # print(contact_num) # for contact_idx in range(contact_num): # site_name = 'contact{}'.format(contact_idx+1) # site_id = env.sim.model.site_name2id(site_name) # env.sim.model.site_pos[site_id] = contacts[contact_idx][-9:-6] # env.sim.forward() # env.render() # time.sleep(1) # for contact_idx in range(contact_num): # site_name = 'contact{}'.format(contact_idx+1) # site_id = env.sim.model.site_name2id(site_name) # env.sim.model.site_pos[site_id] = np.array([1, 0.9, 0.25]) # env.sim.forward() env.render() time.sleep(0.1) env.close()