Пример #1
0
def argsparser():
    parser = argparse.ArgumentParser("Tensorflow Implementation of GAIL")
    parser.add_argument('--env_id', help='environment ID', default='DeepMimic')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--checkpoint_dir', help='the directory to save model', default='checkpoint_tmp')
    parser.add_argument('--log_dir', help='the directory to save log file', default='log')
    parser.add_argument('--load_model_path', help='if provided, load the model', type=str, default=None)
    # Task
    parser.add_argument('--task', type=str, choices=['train', 'evaluate', 'sample'], default='train')
    # for evaluatation
    boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate')
    boolean_flag(parser, 'save_sample', default=False, help='save the trajectories or not')
    #  Mujoco Dataset Configuration
    parser.add_argument('--traj_limitation', type=int, default=-1)
    # Optimization Configuration
    parser.add_argument('--g_step', help='number of steps to train policy in each epoch', type=int, default=3)
    # Network Configuration (Using MLP Policy)
    parser.add_argument('--policy_hidden_size', type=int, default=100)
    # Algorithms Configuration
    parser.add_argument('--max_kl', type=float, default=0.01)
    parser.add_argument('--policy_entcoeff', help='entropy coefficiency of policy', type=float, default=0)
    # Traing Configuration
    parser.add_argument('--save_per_iter', help='save model every xx iterations', type=int, default=100)
    parser.add_argument('--num_timesteps', help='number of timesteps per episode', type=int, default=1e6)
    parser.add_argument('--pretrained_weight_path', help='path of pretrained weights', type=str, default=None)
    return parser.parse_args()
def bc_argparser(description="Behavioral Cloning Experiment"):
    """Create an argparse.ArgumentParser for behavioral cloning-related tasks"""
    parser = argparse(description)
    parser.add_argument('--note', help='w/e', type=str, default=None)
    parser.add_argument('--env_id', help='environment identifier', default='Hopper-v2')
    parser.add_argument('--horizon', help='maximum number of timesteps in an episode',
                        type=int, default=None)
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--checkpoint_dir', help='directory to save the models',
                        default=None)
    parser.add_argument('--log_dir', help='directory to save the log files',
                        default='data/logs')
    parser.add_argument('--summary_dir', help='directory to save the summaries',
                        default='data/summaries')
    parser.add_argument('--task', help='task to carry out', type=str,
                        choices=['clone',
                                 'evaluate_bc_policy'],
                        default='clone')
    parser.add_argument('--expert_path', help='.npz archive containing the demos',
                        type=str, default=None)
    parser.add_argument('--num_demos', help='number of expert demo trajs for imitation',
                        type=int, default=None)
    parser.add_argument('--save_frequency', help='save model every xx iterations',
                        type=int, default=100)
    parser.add_argument('--num_iters', help='cummulative number of iterations since launch',
                        type=int, default=int(1e6))
    parser.add_argument('--batch_size', help='minibatch size', type=int, default=64)
    parser.add_argument('--lr', help='adam learning rate', type=float, default=3e-4)
    parser.add_argument('--clip_norm', type=float, default=None)
    boolean_flag(parser, 'render', help='whether to render the interaction traces', default=False)
    parser.add_argument('--num_trajs', help='number of trajectories to evaluate/gather',
                        type=int, default=10)
    parser.add_argument('--exact_model_path', help='exact path of the model',
                        type=str, default=None)
    parser.add_argument('--model_ckpt_dir', help='checkpoint directory containing the models',
                        type=str, default=None)
    parser.add_argument('--demos_dir', type=str, help='directory to save the demonstrations',
                        default='data/expert_demonstrations')
    boolean_flag(parser, 'rmsify_obs', default=True)
    parser.add_argument('--hid_widths', nargs='+', type=int, default=[64, 64])
    parser.add_argument('--hid_nonlin', type=str, default='leaky_relu',
                        choices=['relu', 'leaky_relu', 'prelu', 'elu', 'selu', 'tanh'])
    parser.add_argument('--hid_w_init', type=str, default='he_normal',
                        choices=['he_normal', 'he_uniform', 'xavier_normal', 'xavier_uniform'])
    return parser
Пример #3
0
def argsparser():
    parser = argparse.ArgumentParser("Tensorflow Implementation of GAIL")
    parser.add_argument('--env_id',
                        help='environment ID',
                        default='CartPole-v0')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--expert_path',
                        type=str,
                        default='expert_data/cartpole')
    parser.add_argument('--checkpoint_dir',
                        help='the directory to save model',
                        default='checkpoint')
    parser.add_argument('--log_dir',
                        help='the directory to save log file',
                        default='log')
    parser.add_argument('--load_model_path',
                        help='if provided, load the model',
                        type=str,
                        default=None)
    # Task
    parser.add_argument('--task',
                        type=str,
                        choices=['train', 'evaluate', 'sample'],
                        default='train')
    # for evaluatation
    boolean_flag(parser,
                 'stochastic_policy',
                 default=False,
                 help='use stochastic/deterministic policy to evaluate')
    boolean_flag(parser,
                 'save_sample',
                 default=False,
                 help='save the trajectories or not')
    #  Mujoco Dataset Configuration
    parser.add_argument('--traj_limitation', type=int, default=-1)
    parser.add_argument('--loss_percent', type=float, default=0.0)
    # Optimization Configuration
    parser.add_argument('--g_step',
                        help='number of steps to train policy in each epoch',
                        type=int,
                        default=1)
    parser.add_argument(
        '--d_step',
        help='number of steps to train discriminator in each epoch',
        type=int,
        default=1)
    # Network Configuration (Using MLP Policy)
    parser.add_argument('--policy_hidden_size', type=int, default=100)
    parser.add_argument('--adversary_hidden_size', type=int, default=100)
    # Algorithms Configuration
    parser.add_argument('--algo',
                        type=str,
                        choices=['trpo', 'state', 'agail'],
                        default='trpo')
    parser.add_argument('--max_kl', type=float, default=0.01)
    parser.add_argument('--policy_entcoeff',
                        help='entropy coefficiency of policy',
                        type=float,
                        default=0)
    parser.add_argument('--adversary_entcoeff',
                        help='entropy coefficiency of discriminator',
                        type=float,
                        default=1e-3)
    # Traing Configuration
    parser.add_argument('--save_per_iter',
                        help='save model every xx iterations',
                        type=int,
                        default=100)
    parser.add_argument('--num_timesteps',
                        help='number of timesteps per episode',
                        type=int,
                        default=1e6)
    # Behavior Cloning
    boolean_flag(parser,
                 'pretrained',
                 default=False,
                 help='Use BC to pretrain')
    parser.add_argument('--BC_max_iter',
                        help='Max iteration for training BC',
                        type=int,
                        default=None)
    return parser.parse_args()