def argsparser(): parser = argparse.ArgumentParser("Tensorflow Implementation of GAIL") parser.add_argument('--env_id', help='environment ID', default='DeepMimic') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--checkpoint_dir', help='the directory to save model', default='checkpoint_tmp') parser.add_argument('--log_dir', help='the directory to save log file', default='log') parser.add_argument('--load_model_path', help='if provided, load the model', type=str, default=None) # Task parser.add_argument('--task', type=str, choices=['train', 'evaluate', 'sample'], default='train') # for evaluatation boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate') boolean_flag(parser, 'save_sample', default=False, help='save the trajectories or not') # Mujoco Dataset Configuration parser.add_argument('--traj_limitation', type=int, default=-1) # Optimization Configuration parser.add_argument('--g_step', help='number of steps to train policy in each epoch', type=int, default=3) # Network Configuration (Using MLP Policy) parser.add_argument('--policy_hidden_size', type=int, default=100) # Algorithms Configuration parser.add_argument('--max_kl', type=float, default=0.01) parser.add_argument('--policy_entcoeff', help='entropy coefficiency of policy', type=float, default=0) # Traing Configuration parser.add_argument('--save_per_iter', help='save model every xx iterations', type=int, default=100) parser.add_argument('--num_timesteps', help='number of timesteps per episode', type=int, default=1e6) parser.add_argument('--pretrained_weight_path', help='path of pretrained weights', type=str, default=None) return parser.parse_args()
def bc_argparser(description="Behavioral Cloning Experiment"): """Create an argparse.ArgumentParser for behavioral cloning-related tasks""" parser = argparse(description) parser.add_argument('--note', help='w/e', type=str, default=None) parser.add_argument('--env_id', help='environment identifier', default='Hopper-v2') parser.add_argument('--horizon', help='maximum number of timesteps in an episode', type=int, default=None) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--checkpoint_dir', help='directory to save the models', default=None) parser.add_argument('--log_dir', help='directory to save the log files', default='data/logs') parser.add_argument('--summary_dir', help='directory to save the summaries', default='data/summaries') parser.add_argument('--task', help='task to carry out', type=str, choices=['clone', 'evaluate_bc_policy'], default='clone') parser.add_argument('--expert_path', help='.npz archive containing the demos', type=str, default=None) parser.add_argument('--num_demos', help='number of expert demo trajs for imitation', type=int, default=None) parser.add_argument('--save_frequency', help='save model every xx iterations', type=int, default=100) parser.add_argument('--num_iters', help='cummulative number of iterations since launch', type=int, default=int(1e6)) parser.add_argument('--batch_size', help='minibatch size', type=int, default=64) parser.add_argument('--lr', help='adam learning rate', type=float, default=3e-4) parser.add_argument('--clip_norm', type=float, default=None) boolean_flag(parser, 'render', help='whether to render the interaction traces', default=False) parser.add_argument('--num_trajs', help='number of trajectories to evaluate/gather', type=int, default=10) parser.add_argument('--exact_model_path', help='exact path of the model', type=str, default=None) parser.add_argument('--model_ckpt_dir', help='checkpoint directory containing the models', type=str, default=None) parser.add_argument('--demos_dir', type=str, help='directory to save the demonstrations', default='data/expert_demonstrations') boolean_flag(parser, 'rmsify_obs', default=True) parser.add_argument('--hid_widths', nargs='+', type=int, default=[64, 64]) parser.add_argument('--hid_nonlin', type=str, default='leaky_relu', choices=['relu', 'leaky_relu', 'prelu', 'elu', 'selu', 'tanh']) parser.add_argument('--hid_w_init', type=str, default='he_normal', choices=['he_normal', 'he_uniform', 'xavier_normal', 'xavier_uniform']) return parser
def argsparser(): parser = argparse.ArgumentParser("Tensorflow Implementation of GAIL") parser.add_argument('--env_id', help='environment ID', default='CartPole-v0') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--expert_path', type=str, default='expert_data/cartpole') parser.add_argument('--checkpoint_dir', help='the directory to save model', default='checkpoint') parser.add_argument('--log_dir', help='the directory to save log file', default='log') parser.add_argument('--load_model_path', help='if provided, load the model', type=str, default=None) # Task parser.add_argument('--task', type=str, choices=['train', 'evaluate', 'sample'], default='train') # for evaluatation boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate') boolean_flag(parser, 'save_sample', default=False, help='save the trajectories or not') # Mujoco Dataset Configuration parser.add_argument('--traj_limitation', type=int, default=-1) parser.add_argument('--loss_percent', type=float, default=0.0) # Optimization Configuration parser.add_argument('--g_step', help='number of steps to train policy in each epoch', type=int, default=1) parser.add_argument( '--d_step', help='number of steps to train discriminator in each epoch', type=int, default=1) # Network Configuration (Using MLP Policy) parser.add_argument('--policy_hidden_size', type=int, default=100) parser.add_argument('--adversary_hidden_size', type=int, default=100) # Algorithms Configuration parser.add_argument('--algo', type=str, choices=['trpo', 'state', 'agail'], default='trpo') parser.add_argument('--max_kl', type=float, default=0.01) parser.add_argument('--policy_entcoeff', help='entropy coefficiency of policy', type=float, default=0) parser.add_argument('--adversary_entcoeff', help='entropy coefficiency of discriminator', type=float, default=1e-3) # Traing Configuration parser.add_argument('--save_per_iter', help='save model every xx iterations', type=int, default=100) parser.add_argument('--num_timesteps', help='number of timesteps per episode', type=int, default=1e6) # Behavior Cloning boolean_flag(parser, 'pretrained', default=False, help='Use BC to pretrain') parser.add_argument('--BC_max_iter', help='Max iteration for training BC', type=int, default=None) return parser.parse_args()