def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if args.debug: exp_dir = os.getcwd() + '/data/debug' elif args.log_path: exp_dir = os.getcwd() + '/data/' + args.env + '/' + args.log_path + '-' + args.obs_type + '-' + args.process_type + '-' + str(args.feature_dim) else: exp_dir = os.getcwd() + '/data/' + args.env + '/' + args.obs_type + '-' + args.process_type + '-' + str(args.feature_dim) # add one more to args if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 # configure_logger(args.log_path) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='gap', snapshot_gap=5) json.dump(args.__dict__, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) else: rank = MPI.COMM_WORLD.Get_rank() # configure_logger(args.log_path, format_strs=[]) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='gap', snapshot_gap=5) json.dump(args.__dict__, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) model, env = train(args, extra_args) save_path = osp.expanduser(exp_dir) + '/policy.pickle' print(save_path) model.save(save_path) return model
def main(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed']) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) with sess.as_default() as sess: folder = './data/policy/' + kwargs['env'] paths = pickle.load(open(folder + '/paths.pickle', 'rb')) niters = paths.get_current_episode_size() // 100 train_data, test_data = split_data(paths, niters) dimo = train_data[0]['o'].shape[-1] dims = [dimo] env = gym.make(kwargs['env'], obs_type=kwargs['obs_type'], fixed_num_of_contact=kwargs['fixed_num_of_contact']) feature_net = FeatureNet( dims, fixed_num_of_contact=kwargs['fixed_num_of_contact'], contact_dim=env.contact_dim, sess=sess, output=kwargs['prediction'], process_type=kwargs['process_type'], feature_dim=kwargs['feature_dim'], feature_layer=kwargs['feature_layer']) sess.run(tf.global_variables_initializer()) for i in range(niters): start = timer.time() feature_net.train(train_data[i]) feature_net.test(test_data[i]) logger.logkv("iter", i) logger.logkv("iter_time", timer.time() - start) logger.dumpkvs() if i == 0: sess.graph.finalize()
def run_experiment(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) with sess.as_default() as sess: # Instantiate classes set_seed(kwargs['seed']) baseline = kwargs['baseline']() # env = normalize(kwargs['env']()) env = GymEnv(kwargs['env'])
def main(**kwargs): import dill as pickle from datetime import datetime exp_dir = os.getcwd() + '/data/feature_net/' + kwargs['input_label'][0] + kwargs['output_label'][0] + '/' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95) sess = tf.Session(config=config) mode = kwargs['mode'][0] visualize_training_data = kwargs['visualize_training_data'][0] visualize_testing_data = kwargs['visualize_testing_data'][0] visualize_new_data = kwargs['visualize_new_data'][0] if mode == 'restore': saver = tf.train.import_meta_graph(exp_dir + '-999.meta') saver.restore(sess, tf.train.latest_checkpoint(exp_dir)) graph = tf.get_default_graph() with sess.as_default() as sess: # folder = './data/policy/' + kwargs['env'][0] # buffer, fixed_num_of_contact = pickle.load(open('../saved/HandManipulateEgg-v0-fix9.pickle', 'rb')) buffer = {} name = 's1' paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb')) for key in paths: buffer[key] = paths[key] for name in ['s2', 's4', 's5', 's6', 'soft3']: paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb')) for key in paths: buffer[key] = np.concatenate([buffer[key], paths[key]], axis = 0) env = gym.make(kwargs['env'][0], obs_type = kwargs['obs_type'][0], fixed_num_of_contact = fixed_num_of_contact) for key in buffer: buffer[key] = buffer[key][:int(1e6)] niters = buffer['positions'].shape[0] // 100 print("total iteration: ", niters) ngeoms = env.sim.model.ngeom input_label = kwargs['input_label'][0] output_label = kwargs['output_label'][0] start = time.time() # paths = expand_data(buffer, ngeoms, fixed_num_of_contact, input_label, output_label) # print("expand data:", time.time() - start) paths = buffer start = time.time() train_data, test_data, vis_data, vis_data_test = split_data(paths, niters) print("split data:", time.time() - start) train_data['object_position'] = train_data['object_position'][:, :, :3] vis_data['original_object_position'] = vis_data['object_position'] vis_data_test['original_object_position'] = vis_data_test['object_position'] test_data['object_position'] = test_data['object_position'][:, :, :3] labels_to_dims = {} labels_to_dims['contacts'] = 3+6+ngeoms labels_to_dims['positions'] = 3 # labels_to_dims['object_position'] = 7 labels_to_dims['object_position'] = 3 labels_to_dims['joint_position'] = 24 labels_to_dims['object_vel'] = 6 labels_to_dims['joint_vel'] = 24 labels_to_dims['geoms'] = ngeoms dims = (labels_to_dims[input_label], labels_to_dims[output_label]) print("preparation done") num_episodes = 1 horizon = 100 if visualize_training_data: visualize_data(vis_data, env, fixed_num_of_contact, feature_net, mode, input_label) if visualize_testing_data: visualize_data(vis_data_test, env, fixed_num_of_contact, feature_net, mode, input_label)
def main(**kwargs): # configure logger, disable logging in child MPI processes (with rank > 0) arg_list = [] for key in kwargs.keys(): arg_list.append('--' + key) arg_list.append(str(kwargs[key])) arg_parser = common_arg_parser() buffer_size = int(kwargs['buffer_size']) args, unknown_args = arg_parser.parse_known_args(arg_list) extra_args = parse_cmdline_kwargs(unknown_args) params = args.__dict__ import copy params = copy.deepcopy(params) if args.obs_type == 'object': params['label'] = args.obs_type elif args.obs_type == 'original': params['label'] = 'object+joint' elif args.obs_type == 'contact': params['label'] = 'object+contact(' + args.process_type + ')' elif args.obs_type == 'full_contact': params['label'] = 'object+joint+contact(' + args.process_type + ')' exp_dir = os.getcwd() + '/data/' + EXP_NAME logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(params, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) folder = './data/policy/' + str(args.env) obs_type = params['obs_type'] fixed_num_of_contact = params['fixed_num_of_contact'] env = gym.make(params['env'], obs_type=obs_type, fixed_num_of_contact=fixed_num_of_contact) policy = pickle.load( open('./data/policy/' + str(args.env)[4:] + '/policy.pickle', 'rb')) T = env._max_episode_steps paths = generate_paths(policy, T, obs_type, params['env'], fixed_num_of_contact, build_env(args), contact_dim=env.contact_dim, buffer_size=buffer_size) paths = process_episode(paths.all_samples(), env.contact_dim, fixed_num_of_contact) folder = '../saved/trained/' + str(args.env) + str(fixed_num_of_contact) with open(folder + '-18-dict.pickle', 'wb') as pickle_file: pickle.dump([paths, fixed_num_of_contact], pickle_file)
def main(**kwargs): exp_dir = os.getcwd( ) + '/cpc_model/' + kwargs['process_type'][0] + '/n200-8' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) obs, acts, fixed_num_of_contact = pickle.load( open('../untrained/HandManipulateEgg-v0/5seeds-dict.pickle', 'rb')) include_action = kwargs['include_action'][0] env = gym.make(kwargs['env'][0], obs_type=kwargs['obs_type'][0], fixed_num_of_contact=[fixed_num_of_contact, True]) ngeoms = env.sim.model.ngeom obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact) next_obs = obs[:, 1:] obs = obs[:, :-1] N, L, _, contact_point_dim = obs.shape N, L, action_dim = acts.shape obs_dim = (fixed_num_of_contact, contact_point_dim) z_dim = 8 lr = 1e-3 epochs = 100 batch_size = 2 n = 200 k = 1 encoder = Encoder(z_dim, obs_dim[1], fixed_num_of_contact).cuda() if include_action: trans = Transition(z_dim, action_dim).cuda() else: trans = Transition(z_dim, 0).cuda() decoder = Decoder(z_dim, 3).cuda() optim_cpc = optim.Adam(list(encoder.parameters()) + list(trans.parameters()), lr=lr) optim_dec = optim.Adam(decoder.parameters(), lr=lr) train_data, test_data = split_data([obs, acts, next_obs]) for epoch in range(epochs): train_cpc(encoder, trans, optim_cpc, epoch, train_data, batch_size, n, k, include_action) test_cpc(encoder, trans, epoch, test_data, batch_size, n, k, include_action) logger.logkv("epoch", epoch) logger.dumpkvs() train_data, test_data = split_data([obs, acts, next_obs, object_info]) for epoch in range(100): train_decoder(decoder, encoder, optim_dec, epoch, train_data, batch_size, include_action, n, k=1) test_decoder(decoder, encoder, epoch, test_data, batch_size, include_action, n, k=1) logger.logkv("epoch", epoch) logger.dumpkvs()
def main(**kwargs): z_dim = kwargs['z_dim'] trans_mode = kwargs['trans_mode'] epochs = kwargs['epochs'] include_action = kwargs['include_action'] label = kwargs['label'] dataset = kwargs['data_path'] feature_dims = kwargs['feature_dims'] mode = kwargs['mode'] n = kwargs['n'] k = kwargs['k'] encoder_lr = kwargs['encoder_lr'] decoder_lr = kwargs['decoder_lr'] decoder_feature_dims = kwargs['decoder_feature_dims'] process_type = kwargs['process_type'] if kwargs['data_path'] == '../dataset/sequence/HandManipulateEgg-v0/5seeds-dict.pickle': kwargs['dataset'] = 'trained_5seeds' elif kwargs['data_path'] == '../dataset/untrained/HandManipulateEgg-v0/5seeds-dict.pickle': kwargs['dataset'] = 'untrained_5seeds' elif kwargs['data_path'] == '../dataset/HandManipulateEgg-v09-dict.pickle': kwargs['dataset'] = 'trained_1seed' exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed']) if kwargs['debug']: save_dir = '../saved_cpc/' + str(label) + '/' + str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained/debug' # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained/debug' else: save_dir = '../saved_cpc/' + str(label) + '/' + str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained' # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95) sess = tf.Session(config=config) obs, acts, fixed_num_of_contact = pickle.load(open(dataset, 'rb')) env = gym.make(kwargs['env'], obs_type = kwargs['obs_type'], fixed_num_of_contact = [fixed_num_of_contact, True]) ngeoms = env.sim.model.ngeom obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact) if kwargs['normalize_data']: obs = normalize_obs(obs) next_obs = obs[:, 1:] obs = obs[:, :-1] N, L, _, contact_point_dim = obs.shape N, L, action_dim = acts.shape obs_dim = (fixed_num_of_contact, contact_point_dim) train_data, test_data = split_data([obs, acts, next_obs, object_info]) batch_size = 2 if mode in ['restore', 'store_weights']: saver = tf.train.import_meta_graph(save_dir + '-999.meta') pur_save_dir = save_dir[:-8] saver.restore(sess, tf.train.latest_checkpoint(pur_save_dir)) graph = tf.get_default_graph() with sess.as_default() as sess: encoder = Encoder(z_dim, fixed_num_of_contact, contact_point_dim, feature_dims) trans = Transition(z_dim, action_dim, mode = trans_mode) cpc = CPC(sess, encoder, trans, encoder_lr, fixed_num_of_contact, contact_point_dim, action_dim, include_action = include_action, type = 1*(label=='cpc1') + 2*(label=='cpc2'), n_neg = n, process_type = process_type, mode = mode) cpc_epochs, decoder_epochs = epochs if mode == 'train': sess.run(tf.global_variables_initializer()) logger.log("training started") for epoch in range(cpc_epochs): # train_cpc(cpc, epoch, train_data, batch_size, n, k) test_cpc(cpc, epoch, test_data, batch_size, n, k) logger.logkv("epoch", epoch) logger.dumpkvs() cpc.save_model(save_dir, 999) """decoder""" logger.log("Done with cpc training.") decoder = Decoder(cpc, sess, z_dim, decoder_feature_dims, fixed_num_of_contact, contact_point_dim, decoder_lr) uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))] sess.run(tf.variables_initializer(uninit_vars)) for epoch in range(decoder_epochs): train_decoder(decoder, epoch, train_data, batch_size, n, k) test_decoder(decoder, epoch, test_data, batch_size, n, k) logger.logkv("epoch", (epoch + cpc_epochs)) logger.dumpkvs() print("model saved in", save_dir) elif mode == 'restore': decoder = Decoder(cpc, sess, z_dim, decoder_feature_dims, fixed_num_of_contact, contact_point_dim, decoder_lr) uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))] sess.run(tf.variables_initializer(uninit_vars)) print("initialized") for epoch in range(100): train_decoder(decoder, epoch, train_data, batch_size, n, k) test_decoder(decoder, epoch, test_data, batch_size, n, k) logger.logkv("epoch", epoch) logger.dumpkvs() print("logging to", exp_dir) elif mode == 'store_weights': old = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='') old = sess.run(old) save_dir = './saved_model/' + str(label) + '/' + str(process_type)+ '/trained/' with open(save_dir + 'weights.pickle', 'wb') as pickle_file: pickle.dump(old, pickle_file) print("weights saved to", save_dir) save_dir = '/home/vioichigo/try/tactile-baselines/saved_model/cpc2/trained' with open(save_dir + 'params.pickle', 'wb') as pickle_file: pickle.dump([z_dim, fixed_num_of_contact, contact_point_dim, action_dim, encoder_lr, feature_dims, trans_mode, label, include_action], pickle_file) tf.reset_default_graph() print("graph reset successfully")
def run_experiment(**kwargs): exp_dir = os.getcwd() + '/data/' + EXP_NAME logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get( 'gpu_frac', 0.95) sess = tf.Session(config=config) with sess.as_default() as sess: # Instantiate classes set_seed(kwargs['seed']) baseline = kwargs['baseline']() # env = normalize(kwargs['env']()) arg_list = [] for key in kwargs.keys(): arg_list.append('--' + key) arg_list.append(str(kwargs[key])) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(arg_list) env = build_env(args) Qs = [ ValueFunction( name="q_fun_%d" % i, obs_dim=int(np.prod(env.observation_space.shape)), action_dim=int(np.prod(env.action_space.shape)), hidden_nonlinearity=kwargs['vfun_hidden_nonlineariy'], ) for i in range(2) ] Q_targets = [ ValueFunction( name="q_fun_target_%d" % i, obs_dim=int(np.prod(env.observation_space.shape)), action_dim=int(np.prod(env.action_space.shape)), hidden_nonlinearity=kwargs['vfun_hidden_nonlineariy'], ) for i in range(2) ] policy = GaussianMLPPolicy( name="policy", obs_dim=np.prod(env.observation_space.shape), action_dim=np.prod(env.action_space.shape), hidden_sizes=kwargs['policy_hidden_sizes'], learn_std=kwargs['policy_learn_std'], output_nonlinearity=kwargs['policy_output_nonlinearity'], hidden_nonlinearity=kwargs['policy_hidden_nonlinearity'], squashed=True) sampler = BaseSampler( env=env, policy=policy, num_rollouts=kwargs['num_rollouts'], max_path_length=kwargs['max_path_length'], ) sample_processor = ModelSampleProcessor( baseline=baseline, discount=kwargs['discount'], ) algo = SAC(policy=policy, discount=kwargs['discount'], learning_rate=kwargs['learning_rate'], env=env, Qs=Qs, Q_targets=Q_targets, reward_scale=kwargs['reward_scale'], batch_size=kwargs['batch_size']) trainer = Trainer( algo=algo, policy=policy, env=env, sampler=sampler, sample_processor=sample_processor, n_itr=kwargs['n_itr'], sess=sess, ) trainer.train() sess.__exit__()
def configure_logger(log_path, **kwargs): if log_path is not None: logger.configure(log_path) else: logger.configure(**kwargs)