示例#1
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    if args.debug:
        exp_dir = os.getcwd() + '/data/debug'
    elif args.log_path:
        exp_dir = os.getcwd() + '/data/' + args.env + '/' + args.log_path + '-' + args.obs_type + '-' + args.process_type + '-' + str(args.feature_dim)
    else:
        exp_dir = os.getcwd() + '/data/' + args.env  + '/' + args.obs_type + '-' + args.process_type + '-' + str(args.feature_dim)

    # add one more to args
    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        # configure_logger(args.log_path)
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='gap', snapshot_gap=5)
        json.dump(args.__dict__, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        # configure_logger(args.log_path, format_strs=[])
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='gap', snapshot_gap=5)
        json.dump(args.__dict__, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)

    model, env = train(args, extra_args)

    save_path = osp.expanduser(exp_dir) + '/policy.pickle'
    print(save_path)
    model.save(save_path)

    return model
示例#2
0
def main(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed'])
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)

    with sess.as_default() as sess:
        folder = './data/policy/' + kwargs['env']
        paths = pickle.load(open(folder + '/paths.pickle', 'rb'))
        niters = paths.get_current_episode_size() // 100
        train_data, test_data = split_data(paths, niters)

        dimo = train_data[0]['o'].shape[-1]

        dims = [dimo]
        env = gym.make(kwargs['env'],
                       obs_type=kwargs['obs_type'],
                       fixed_num_of_contact=kwargs['fixed_num_of_contact'])

        feature_net = FeatureNet(
            dims,
            fixed_num_of_contact=kwargs['fixed_num_of_contact'],
            contact_dim=env.contact_dim,
            sess=sess,
            output=kwargs['prediction'],
            process_type=kwargs['process_type'],
            feature_dim=kwargs['feature_dim'],
            feature_layer=kwargs['feature_layer'])

        sess.run(tf.global_variables_initializer())
        for i in range(niters):
            start = timer.time()
            feature_net.train(train_data[i])
            feature_net.test(test_data[i])
            logger.logkv("iter", i)
            logger.logkv("iter_time", timer.time() - start)
            logger.dumpkvs()
            if i == 0:
                sess.graph.finalize()
示例#3
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)
    with sess.as_default() as sess:

        # Instantiate classes
        set_seed(kwargs['seed'])

        baseline = kwargs['baseline']()

        # env = normalize(kwargs['env']())
        env = GymEnv(kwargs['env'])
示例#4
0
def main(**kwargs):
    import dill as pickle
    from datetime import datetime
    exp_dir = os.getcwd() + '/data/feature_net/' + kwargs['input_label'][0] + kwargs['output_label'][0] + '/'
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95)
    sess = tf.Session(config=config)

    mode = kwargs['mode'][0]
    visualize_training_data = kwargs['visualize_training_data'][0]
    visualize_testing_data = kwargs['visualize_testing_data'][0]
    visualize_new_data = kwargs['visualize_new_data'][0]

    if mode == 'restore':
        saver = tf.train.import_meta_graph(exp_dir + '-999.meta')
        saver.restore(sess, tf.train.latest_checkpoint(exp_dir))
        graph = tf.get_default_graph()

    with sess.as_default() as sess:

        # folder = './data/policy/' + kwargs['env'][0]
        # buffer, fixed_num_of_contact = pickle.load(open('../saved/HandManipulateEgg-v0-fix9.pickle', 'rb'))

        buffer = {}
        name = 's1'
        paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb'))
        for key in paths:
            buffer[key] = paths[key]

        for name in ['s2', 's4', 's5', 's6', 'soft3']:
            paths, fixed_num_of_contact = pickle.load(open('../saved/soft/' + name + '80-dict.pickle', 'rb'))
            for key in paths:
                buffer[key] = np.concatenate([buffer[key], paths[key]], axis = 0)


        env = gym.make(kwargs['env'][0],
                       obs_type = kwargs['obs_type'][0],
                       fixed_num_of_contact = fixed_num_of_contact)

        for key in buffer:
            buffer[key] = buffer[key][:int(1e6)]


        niters = buffer['positions'].shape[0] // 100
        print("total iteration: ", niters)


        ngeoms = env.sim.model.ngeom
        input_label = kwargs['input_label'][0]
        output_label = kwargs['output_label'][0]
        start = time.time()
        # paths = expand_data(buffer, ngeoms, fixed_num_of_contact, input_label, output_label)
        # print("expand data:", time.time() - start)
        paths = buffer

        start = time.time()
        train_data, test_data, vis_data, vis_data_test = split_data(paths, niters)
        print("split data:", time.time() - start)

        train_data['object_position'] = train_data['object_position'][:, :, :3]
        vis_data['original_object_position'] = vis_data['object_position']
        vis_data_test['original_object_position'] = vis_data_test['object_position']
        test_data['object_position'] = test_data['object_position'][:, :, :3]

        labels_to_dims = {}
        labels_to_dims['contacts'] = 3+6+ngeoms
        labels_to_dims['positions'] = 3
        # labels_to_dims['object_position'] = 7
        labels_to_dims['object_position'] = 3
        labels_to_dims['joint_position'] = 24
        labels_to_dims['object_vel'] = 6
        labels_to_dims['joint_vel'] = 24
        labels_to_dims['geoms'] = ngeoms



        dims = (labels_to_dims[input_label], labels_to_dims[output_label])
        print("preparation done")



        num_episodes = 1
        horizon = 100
        if visualize_training_data:
            visualize_data(vis_data, env, fixed_num_of_contact, feature_net, mode, input_label)
        if visualize_testing_data:
            visualize_data(vis_data_test, env, fixed_num_of_contact, feature_net, mode, input_label)
示例#5
0
def main(**kwargs):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_list = []
    for key in kwargs.keys():
        arg_list.append('--' + key)
        arg_list.append(str(kwargs[key]))
    arg_parser = common_arg_parser()
    buffer_size = int(kwargs['buffer_size'])
    args, unknown_args = arg_parser.parse_known_args(arg_list)
    extra_args = parse_cmdline_kwargs(unknown_args)

    params = args.__dict__
    import copy
    params = copy.deepcopy(params)

    if args.obs_type == 'object':
        params['label'] = args.obs_type
    elif args.obs_type == 'original':
        params['label'] = 'object+joint'
    elif args.obs_type == 'contact':
        params['label'] = 'object+contact(' + args.process_type + ')'
    elif args.obs_type == 'full_contact':
        params['label'] = 'object+joint+contact(' + args.process_type + ')'

    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(params,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)

    folder = './data/policy/' + str(args.env)

    obs_type = params['obs_type']
    fixed_num_of_contact = params['fixed_num_of_contact']

    env = gym.make(params['env'],
                   obs_type=obs_type,
                   fixed_num_of_contact=fixed_num_of_contact)

    policy = pickle.load(
        open('./data/policy/' + str(args.env)[4:] + '/policy.pickle', 'rb'))
    T = env._max_episode_steps

    paths = generate_paths(policy,
                           T,
                           obs_type,
                           params['env'],
                           fixed_num_of_contact,
                           build_env(args),
                           contact_dim=env.contact_dim,
                           buffer_size=buffer_size)

    paths = process_episode(paths.all_samples(), env.contact_dim,
                            fixed_num_of_contact)

    folder = '../saved/trained/' + str(args.env) + str(fixed_num_of_contact)
    with open(folder + '-18-dict.pickle', 'wb') as pickle_file:
        pickle.dump([paths, fixed_num_of_contact], pickle_file)
示例#6
0
def main(**kwargs):
    exp_dir = os.getcwd(
    ) + '/cpc_model/' + kwargs['process_type'][0] + '/n200-8'
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)

    obs, acts, fixed_num_of_contact = pickle.load(
        open('../untrained/HandManipulateEgg-v0/5seeds-dict.pickle', 'rb'))

    include_action = kwargs['include_action'][0]

    env = gym.make(kwargs['env'][0],
                   obs_type=kwargs['obs_type'][0],
                   fixed_num_of_contact=[fixed_num_of_contact, True])

    ngeoms = env.sim.model.ngeom
    obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact)
    next_obs = obs[:, 1:]
    obs = obs[:, :-1]
    N, L, _, contact_point_dim = obs.shape
    N, L, action_dim = acts.shape

    obs_dim = (fixed_num_of_contact, contact_point_dim)

    z_dim = 8
    lr = 1e-3
    epochs = 100
    batch_size = 2
    n = 200
    k = 1

    encoder = Encoder(z_dim, obs_dim[1], fixed_num_of_contact).cuda()
    if include_action:
        trans = Transition(z_dim, action_dim).cuda()
    else:
        trans = Transition(z_dim, 0).cuda()
    decoder = Decoder(z_dim, 3).cuda()

    optim_cpc = optim.Adam(list(encoder.parameters()) +
                           list(trans.parameters()),
                           lr=lr)
    optim_dec = optim.Adam(decoder.parameters(), lr=lr)
    train_data, test_data = split_data([obs, acts, next_obs])

    for epoch in range(epochs):
        train_cpc(encoder, trans, optim_cpc, epoch, train_data, batch_size, n,
                  k, include_action)
        test_cpc(encoder, trans, epoch, test_data, batch_size, n, k,
                 include_action)

        logger.logkv("epoch", epoch)
        logger.dumpkvs()

    train_data, test_data = split_data([obs, acts, next_obs, object_info])
    for epoch in range(100):
        train_decoder(decoder,
                      encoder,
                      optim_dec,
                      epoch,
                      train_data,
                      batch_size,
                      include_action,
                      n,
                      k=1)
        test_decoder(decoder,
                     encoder,
                     epoch,
                     test_data,
                     batch_size,
                     include_action,
                     n,
                     k=1)
        logger.logkv("epoch", epoch)
        logger.dumpkvs()
示例#7
0
def main(**kwargs):
    z_dim = kwargs['z_dim']
    trans_mode = kwargs['trans_mode']
    epochs = kwargs['epochs']
    include_action = kwargs['include_action']
    label = kwargs['label']

    dataset = kwargs['data_path']
    feature_dims = kwargs['feature_dims']
    mode = kwargs['mode']
    n = kwargs['n']
    k = kwargs['k']
    encoder_lr = kwargs['encoder_lr']
    decoder_lr = kwargs['decoder_lr']
    decoder_feature_dims = kwargs['decoder_feature_dims']
    process_type = kwargs['process_type']

    if kwargs['data_path'] == '../dataset/sequence/HandManipulateEgg-v0/5seeds-dict.pickle':
        kwargs['dataset'] = 'trained_5seeds'
    elif kwargs['data_path'] == '../dataset/untrained/HandManipulateEgg-v0/5seeds-dict.pickle':
        kwargs['dataset'] = 'untrained_5seeds'
    elif kwargs['data_path'] == '../dataset/HandManipulateEgg-v09-dict.pickle':
        kwargs['dataset'] = 'trained_1seed'
    exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed'])
    if kwargs['debug']:
        save_dir = '../saved_cpc/' + str(label) + '/' +  str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained/debug'
        # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained/debug'
    else:
        save_dir = '../saved_cpc/' + str(label) + '/' +  str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained'
        # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained'
    logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last')
    json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95)
    sess = tf.Session(config=config)

    obs, acts, fixed_num_of_contact = pickle.load(open(dataset, 'rb'))

    env = gym.make(kwargs['env'],
                   obs_type = kwargs['obs_type'],
                   fixed_num_of_contact = [fixed_num_of_contact, True])

    ngeoms = env.sim.model.ngeom
    obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact)
    if kwargs['normalize_data']:
        obs = normalize_obs(obs)
    next_obs = obs[:, 1:]
    obs = obs[:, :-1]
    N, L, _, contact_point_dim = obs.shape
    N, L, action_dim = acts.shape

    obs_dim = (fixed_num_of_contact, contact_point_dim)
    train_data, test_data = split_data([obs, acts, next_obs, object_info])

    batch_size = 2

    if mode in ['restore', 'store_weights']:
        saver = tf.train.import_meta_graph(save_dir + '-999.meta')
        pur_save_dir = save_dir[:-8]
        saver.restore(sess, tf.train.latest_checkpoint(pur_save_dir))
        graph = tf.get_default_graph()

    with sess.as_default() as sess:
        encoder = Encoder(z_dim,
                          fixed_num_of_contact,
                          contact_point_dim,
                          feature_dims)
        trans = Transition(z_dim, action_dim, mode = trans_mode)
        cpc = CPC(sess,
                  encoder,
                  trans,
                  encoder_lr,
                  fixed_num_of_contact,
                  contact_point_dim,
                  action_dim,
                  include_action = include_action,
                  type = 1*(label=='cpc1') + 2*(label=='cpc2'),
                  n_neg = n,
                  process_type = process_type,
                  mode = mode)

        cpc_epochs, decoder_epochs = epochs
        if mode == 'train':
            sess.run(tf.global_variables_initializer())
            logger.log("training started")
            for epoch in range(cpc_epochs):
                # train_cpc(cpc, epoch, train_data, batch_size, n, k)
                test_cpc(cpc, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", epoch)
                logger.dumpkvs()
            cpc.save_model(save_dir, 999)

            """decoder"""
            logger.log("Done with cpc training.")

            decoder = Decoder(cpc,
                              sess,
                              z_dim,
                              decoder_feature_dims,
                              fixed_num_of_contact,
                              contact_point_dim,
                              decoder_lr)
            uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))]
            sess.run(tf.variables_initializer(uninit_vars))
            for epoch in range(decoder_epochs):
                train_decoder(decoder, epoch, train_data, batch_size, n, k)
                test_decoder(decoder, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", (epoch + cpc_epochs))
                logger.dumpkvs()
            print("model saved in", save_dir)

        elif mode == 'restore':
            decoder = Decoder(cpc,
                              sess,
                              z_dim,
                              decoder_feature_dims,
                              fixed_num_of_contact,
                              contact_point_dim,
                              decoder_lr)
            uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))]
            sess.run(tf.variables_initializer(uninit_vars))
            print("initialized")
            for epoch in range(100):
                train_decoder(decoder, epoch, train_data, batch_size, n, k)
                test_decoder(decoder, epoch, test_data, batch_size, n, k)

                logger.logkv("epoch", epoch)
                logger.dumpkvs()
                print("logging to", exp_dir)

        elif mode == 'store_weights':
            old = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='')
            old = sess.run(old)
            save_dir = './saved_model/' +  str(label) + '/' + str(process_type)+ '/trained/'
            with open(save_dir + 'weights.pickle', 'wb') as pickle_file:
                pickle.dump(old, pickle_file)
            print("weights saved to", save_dir)

            save_dir = '/home/vioichigo/try/tactile-baselines/saved_model/cpc2/trained'
            with open(save_dir + 'params.pickle', 'wb') as pickle_file:
                pickle.dump([z_dim, fixed_num_of_contact, contact_point_dim, action_dim, encoder_lr, feature_dims, trans_mode, label, include_action], pickle_file)

        tf.reset_default_graph()
        print("graph reset successfully")
示例#8
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last')
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = kwargs.get(
        'gpu_frac', 0.95)
    sess = tf.Session(config=config)
    with sess.as_default() as sess:

        # Instantiate classes
        set_seed(kwargs['seed'])

        baseline = kwargs['baseline']()

        #        env = normalize(kwargs['env']())
        arg_list = []
        for key in kwargs.keys():
            arg_list.append('--' + key)
            arg_list.append(str(kwargs[key]))
        arg_parser = common_arg_parser()
        args, unknown_args = arg_parser.parse_known_args(arg_list)
        env = build_env(args)

        Qs = [
            ValueFunction(
                name="q_fun_%d" % i,
                obs_dim=int(np.prod(env.observation_space.shape)),
                action_dim=int(np.prod(env.action_space.shape)),
                hidden_nonlinearity=kwargs['vfun_hidden_nonlineariy'],
            ) for i in range(2)
        ]

        Q_targets = [
            ValueFunction(
                name="q_fun_target_%d" % i,
                obs_dim=int(np.prod(env.observation_space.shape)),
                action_dim=int(np.prod(env.action_space.shape)),
                hidden_nonlinearity=kwargs['vfun_hidden_nonlineariy'],
            ) for i in range(2)
        ]

        policy = GaussianMLPPolicy(
            name="policy",
            obs_dim=np.prod(env.observation_space.shape),
            action_dim=np.prod(env.action_space.shape),
            hidden_sizes=kwargs['policy_hidden_sizes'],
            learn_std=kwargs['policy_learn_std'],
            output_nonlinearity=kwargs['policy_output_nonlinearity'],
            hidden_nonlinearity=kwargs['policy_hidden_nonlinearity'],
            squashed=True)

        sampler = BaseSampler(
            env=env,
            policy=policy,
            num_rollouts=kwargs['num_rollouts'],
            max_path_length=kwargs['max_path_length'],
        )

        sample_processor = ModelSampleProcessor(
            baseline=baseline,
            discount=kwargs['discount'],
        )

        algo = SAC(policy=policy,
                   discount=kwargs['discount'],
                   learning_rate=kwargs['learning_rate'],
                   env=env,
                   Qs=Qs,
                   Q_targets=Q_targets,
                   reward_scale=kwargs['reward_scale'],
                   batch_size=kwargs['batch_size'])

        trainer = Trainer(
            algo=algo,
            policy=policy,
            env=env,
            sampler=sampler,
            sample_processor=sample_processor,
            n_itr=kwargs['n_itr'],
            sess=sess,
        )

        trainer.train()
    sess.__exit__()
示例#9
0
def configure_logger(log_path, **kwargs):
    if log_path is not None:
        logger.configure(log_path)
    else:
        logger.configure(**kwargs)