Пример #1
0
                        moving_ave.append(test_info[1])
                        if (len(moving_ave) > smooth_alg):
                            moving_ave.pop(0)
                        test_info[1] = np.mean(moving_ave)
                        info.append(test_info)
                    record_type = record[:-4]
                    if not (record_type in data.keys()):
                        data[record_type] = {}
                    if not (alg_type in data[record_type].keys()):
                        data[record_type][alg_type] = []
                    data[record_type][alg_type].append(info)
    return data, data_path


if __name__ == '__main__':
    parser = get_arg_parser()
    parser.add_argument('--smooth',
                        help='number of checkpoints for smoothing',
                        type=np.int32,
                        default=10)
    parser.add_argument('--color',
                        help='color theme id',
                        type=np.int32,
                        default=0)
    args = parser.parse_args()

    plot_colors_alg = plot_colors_alg[args.color]

    matplotlib.use('Agg')
    plt.figure(figsize=(17.5, 4.25), frameon=False)
Пример #2
0
def get_args(do_just_test=False):  #this parameter is just used for the name
    parser = get_arg_parser()

    parser.add_argument('--tag',
                        help='terminal tag in logger',
                        type=str,
                        default='')
    parser.add_argument('--alg',
                        help='backend algorithm',
                        type=str,
                        default='ddpg',
                        choices=['ddpg', 'ddpg2'])
    parser.add_argument('--learn',
                        help='type of training method',
                        type=str,
                        default='hgg',
                        choices=learner_collection.keys())

    parser.add_argument('--env',
                        help='gym env id',
                        type=str,
                        default='FetchReach-v1',
                        choices=Robotics_envs_id)
    parser.add_argument('--extra_sec',
                        help='whether to use extra distance around obstacle',
                        type=str2bool,
                        default=False)
    args, _ = parser.parse_known_args()
    if args.env == 'HandReach-v0':
        parser.add_argument('--goal',
                            help='method of goal generation',
                            type=str,
                            default='reach',
                            choices=['vanilla', 'reach'])
    else:
        parser.add_argument(
            '--goal',
            help='method of goal generation',
            type=str,
            default='interval',
            choices=[
                'vanilla', 'fixobj', 'interval', 'custom', 'intervalTest',
                'intervalExt', 'intervalColl', 'intervalRewSub',
                'intervalRewVec', 'intervalTestExtendedBbox',
                'intervalCollStop', 'intervalRewMod', 'intervalCollStopRegion',
                'intervalRewModStop', 'intervalRewModRegion',
                'intervalRewModRegionStop', 'intervalCollMinDist',
                'intervalMinDistRewMod', 'intervalMinDistRewModStop',
                'intervalTestExtendedMinDist', 'intervalCollPAV', 'intervalP',
                'intervalPRewMod', 'intervalPRewModStop',
                'intervalTestExtendedP', 'intervalPAV', 'intervalPAVRewMod',
                'intervalPAVRewModStop', 'intervalTestExtendedPAV',
                'intervalPRel', 'intervalPRelRewMod', 'intervalPRelRewModStop',
                'intervalTestExtendedPRel', 'intervalPAVRel',
                'intervalPAVRelRewMod', 'intervalPAVRelRewModStop',
                'intervalTestExtendedPAVRel'
            ])

        if args.env[:5] == 'Fetch':
            parser.add_argument('--init_offset',
                                help='initial offset in fetch environments',
                                type=np.float32,
                                default=1.0)
        elif args.env[:4] == 'Hand':
            parser.add_argument('--init_rotation',
                                help='initial rotation in hand environments',
                                type=np.float32,
                                default=0.25)

    args, _ = parser.parse_known_args()
    if 'RewMod' in args.goal:
        parser.add_argument('--rew_mod_val',
                            help='value to subtract on collision',
                            type=np.float32,
                            default=-5.)
    if args.extra_sec:
        parser.add_argument('--sec_dist',
                            help='security distance around obstacle',
                            type=np.float32,
                            default=None)
    parser.add_argument('--graph',
                        help='g-hgg yes or no',
                        type=str2bool,
                        default=False)
    parser.add_argument('--show_goals',
                        help='number of goals to show',
                        type=np.int32,
                        default=0)
    parser.add_argument('--play_path',
                        help='path to meta_file directory for play',
                        type=str,
                        default=None)
    parser.add_argument(
        '--play_path_im_h',
        help=
        'path to meta_file directory for play; this one is just used for the heatmap that will be compared',
        type=str,
        default=None)
    parser.add_argument('--play_epoch',
                        help='epoch to play',
                        type=str,
                        default='latest')
    parser.add_argument(
        '--stop_hgg_threshold',
        help=
        'threshold of goals inside goalspace, between 0 and 1, deactivated by default value 2!',
        type=np.float32,
        default=2)
    parser.add_argument('--agent_device',
                        help='the device to load the agent',
                        type=str,
                        default='cpu')

    parser.add_argument('--n_x',
                        help='number of vertices in x-direction for g-hgg',
                        type=int,
                        default=31)
    parser.add_argument('--n_y',
                        help='number of vertices in y-direction for g-hgg',
                        type=int,
                        default=31)
    parser.add_argument('--n_z',
                        help='number of vertices in z-direction for g-hgg',
                        type=int,
                        default=11)

    parser.add_argument('--gamma',
                        help='discount factor',
                        type=np.float32,
                        default=0.98)
    parser.add_argument('--clip_return',
                        help='whether to clip return value',
                        type=str2bool,
                        default=True)
    # these two arguments might be helpful if using other than sparse reward (-1, 0)
    parser.add_argument('--reward_min',
                        help='discount factor',
                        type=np.float32,
                        default=-1.)
    parser.add_argument('--reward_max',
                        help='discount factor',
                        type=np.float32,
                        default=0.)
    parser.add_argument('--eps_act',
                        help='percentage of epsilon greedy explorarion',
                        type=np.float32,
                        default=0.3)
    parser.add_argument(
        '--std_act',
        help='standard deviation of uncorrelated gaussian explorarion',
        type=np.float32,
        default=0.2)

    parser.add_argument('--pi_lr',
                        help='learning rate of policy network',
                        type=np.float32,
                        default=1e-3)
    parser.add_argument('--q_lr',
                        help='learning rate of value network',
                        type=np.float32,
                        default=1e-3)
    parser.add_argument('--act_l2',
                        help='quadratic penalty on actions',
                        type=np.float32,
                        default=1.0)
    parser.add_argument(
        '--polyak',
        help='interpolation factor in polyak averaging for DDPG',
        type=np.float32,
        default=0.95)

    parser.add_argument('--epoches',
                        help='number of epoches',
                        type=np.int32,
                        default=20)
    parser.add_argument('--cycles',
                        help='number of cycles per epoch',
                        type=np.int32,
                        default=20)
    parser.add_argument('--episodes',
                        help='number of episodes per cycle',
                        type=np.int32,
                        default=50)
    parser.add_argument('--timesteps',
                        help='number of timesteps per episode',
                        type=np.int32,
                        default=(50 if args.env[:5] == 'Fetch' else 100))
    parser.add_argument('--train_batches',
                        help='number of batches to train per episode',
                        type=np.int32,
                        default=20)

    parser.add_argument('--buffer_size',
                        help='number of episodes in replay buffer',
                        type=np.int32,
                        default=10000)
    parser.add_argument(
        '--buffer_type',
        help=
        'type of replay buffer / whether to use Energy-Based Prioritization',
        type=str,
        default='energy',
        choices=['normal', 'energy'])
    parser.add_argument('--batch_size',
                        help='size of sample batch',
                        type=np.int32,
                        default=256)
    parser.add_argument('--warmup',
                        help='number of timesteps for buffer warmup',
                        type=np.int32,
                        default=10000)
    parser.add_argument('--her',
                        help='type of hindsight experience replay',
                        type=str,
                        default='future',
                        choices=['none', 'final', 'future'])
    parser.add_argument('--her_ratio',
                        help='ratio of hindsight experience replay',
                        type=np.float32,
                        default=0.8)
    parser.add_argument('--pool_rule',
                        help='rule of collecting achieved states',
                        type=str,
                        default='full',
                        choices=['full', 'final'])

    parser.add_argument('--hgg_c',
                        help='weight of initial distribution in flow learner',
                        type=np.float32,
                        default=3.0)
    parser.add_argument('--hgg_L',
                        help='Lipschitz constant',
                        type=np.float32,
                        default=5.0)
    parser.add_argument('--hgg_pool_size',
                        help='size of achieved trajectories pool',
                        type=np.int32,
                        default=1000)

    parser.add_argument('--save_acc',
                        help='save successful rate',
                        type=str2bool,
                        default=True)

    #arguments for VAEs and images
    parser.add_argument('--vae_dist_help',
                        help='using vaes yes or no',
                        type=str2bool,
                        default=False)
    parser.add_argument('--img_size',
                        help='size image in pixels',
                        type=np.int32,
                        default=84)
    parser.add_argument('--img_vid_size',
                        help='size image in pixels',
                        type=np.int32,
                        default=500)
    #type of VAE
    parser.add_argument(
        '--vae_type',
        help='',
        type=str,
        default=None,
        choices=['sb', 'mixed', 'monet', 'space', 'bbox', 'faster_rcnn'])
    #type VAE for size
    parser.add_argument(
        '--vae_size_type',
        help='',
        type=str,
        default='all',
        choices=['normal', 'sb', 'mixed',
                 'monet'])  #if mixed or monet then representation is shared

    #parameters for VAE
    parser.add_argument('--latent_size_obstacle',
                        help='size latent space obstacle',
                        type=np.int32,
                        default=None)
    parser.add_argument('--latent_size_goal',
                        help='size latent space goal',
                        type=np.int32,
                        default=None)
    parser.add_argument('--obstacle_ind_1',
                        help='index 1 component latent vector',
                        type=np.int32,
                        default=None)
    parser.add_argument('--obstacle_ind_2',
                        help='index 2 component latent vector',
                        type=np.int32,
                        default=None)
    parser.add_argument('--goal_ind_1',
                        help='index 1 component latent vector',
                        type=np.int32,
                        default=None)
    parser.add_argument('--goal_ind_2',
                        help='index 2 component latent vector',
                        type=np.int32,
                        default=None)
    parser.add_argument('--goal_slot', help='', type=np.int32, default=None)
    parser.add_argument('--obstacle_slot',
                        help='',
                        type=np.int32,
                        default=None)

    #parameter for size VAE
    parser.add_argument('--size_ind',
                        help='index 2 component latent vector',
                        type=np.int32,
                        default=None)
    parser.add_argument('--size_ind_2',
                        help='index 2 component latent vector',
                        type=np.int32,
                        default=None)

    parser.add_argument(
        '--dist_estimator_type',
        help='the type if dist estimator to use or None if not using',
        type=str,
        default=None,
        choices=[
            'noneType', 'noneTypeReal', 'normal', 'realCoords', 'multiple',
            'multipleReal', 'subst', 'substReal'
        ])

    #for imaginary obstacle interactions
    parser.add_argument('--imaginary_obstacle_transitions',
                        help='expand obstacle transition',
                        type=str2bool,
                        default=False)
    args, _ = parser.parse_known_args()
    if args.imaginary_obstacle_transitions:
        parser.add_argument(
            '--im_train_freq',
            help='how often the imaginary transitions are used',
            type=np.int,
            default=5)
        parser.add_argument('--im_buffer_size',
                            help='size of the imaginary buffer',
                            type=np.int,
                            default=400)
        parser.add_argument(
            '--im_warmup',
            help='minimum amount of transitions to start sampling',
            type=np.int,
            default=120)
        parser.add_argument(
            '--im_n_per_type',
            help='amount fake interactions per type of interaction',
            type=np.int,
            default=5)

    args = parser.parse_args()
    args.num_vertices = [args.n_x, args.n_y, args.n_z]
    args.goal_based = (args.env in Robotics_envs_id)
    args.clip_return_l, args.clip_return_r = clip_return_range(args)

    if args.extra_sec and args.sec_dist is None:
        if args.vae_dist_help:  #distance for latent space
            args.sec_dist = 0.02
        else:
            args.sec_dist = 0.009

    if args.imaginary_obstacle_transitions:
        args.im_train_counter = 0
        args.im_norm_freq = copy.copy(args.im_train_freq)
        args.im_norm_counter = 0

    if do_just_test:
        args.epoches = 1
        args.cycles = 5

    base_name = args.alg + '-' + args.env + '-' + args.goal + '-' + args.learn
    if do_just_test:
        if args.play_path is not None:
            remaining, last = os.path.split(args.play_path)
            if args.env in last:
                logger_name = 'TEST-' + last
            else:
                remaining, last = os.path.split(remaining)
                if args.env in last:
                    logger_name = 'TEST-' + last
                else:
                    logger_name = 'TEST-' + base_name
            if 'secdist' in args.play_path and not args.extra_sec:
                raise Exception(
                    'using agent trained with security distance, but test not using it. Add the same distance'
                )

        else:
            logger_name = 'TEST-' + base_name

        if str.startswith(remaining, 'log/'):
            rest_path = remaining[4:]
            if len(rest_path) > 0:
                logger_name = rest_path + '/' + logger_name
    else:
        logger_name = base_name
        if args.tag != '': logger_name = args.tag + '-' + logger_name
        if args.graph:
            logger_name = logger_name + '-graph'
        if args.stop_hgg_threshold < 1:
            logger_name = logger_name + '-stop'
        if args.dist_estimator_type is not None:
            logger_name = logger_name + '-' + args.dist_estimator_type
        if args.vae_type is not None:
            logger_name = logger_name + '-' + args.vae_type
        if 'RewMod' in args.goal:
            logger_name = logger_name + '-rewmodVal(' + str(
                args.rew_mod_val) + ')'
        if args.extra_sec:
            logger_name = logger_name + '-secdist({})'.format(args.sec_dist)
        if args.imaginary_obstacle_transitions:
            logger_name = logger_name + '-IMAGINARY'
    args.logger = get_logger(logger_name)

    for key, value in args.__dict__.items():
        if key != 'logger':
            args.logger.info('{}: {}'.format(key, value))

    cuda = torch.cuda.is_available()
    torch.manual_seed(1)
    device = torch.device("cuda" if cuda else "cpu")
    args.device = device

    #extensions from intervale_ext

    if args.goal in ['intervalRewVec']:
        args.reward_dims = 2
    else:
        args.reward_dims = 1

    args.colls_test_check_envs = [
        'intervalTestExtendedBbox', 'intervalTestExtendedMinDist',
        'intervalTestExtendedP', 'intervalTestExtendedPAV',
        'intervalTestExtendedPRel', 'intervalTestExtendedPAVRel',
        'intervalTest'
    ]

    return args
Пример #3
0
def get_args():
    parser = get_arg_parser()

    # basic arguments
    parser.add_argument('--tag',
                        help='terminal tag in logger',
                        type=str,
                        default='')
    parser.add_argument('--gpu', help='which gpu to use', type=int, default=0)
    parser.add_argument('--env', help='gym env id', type=str, default='Pong')
    parser.add_argument('--alg',
                        help='backend algorithm',
                        type=str,
                        default='dqn',
                        choices=algorithm_collection.keys())
    parser.add_argument('--learn',
                        help='type of training method',
                        type=str,
                        default='normal',
                        choices=learner_collection.keys())

    args, _ = parser.parse_known_args()

    # env arguments
    parser.add_argument('--gamma',
                        help='discount factor',
                        type=np.float32,
                        default=0.99)

    def atari_args():
        parser.set_defaults(learn='atari')
        parser.add_argument('--sticky',
                            help='whether to use sticky actions',
                            type=str2bool,
                            default=False)
        parser.add_argument('--xian',
                            help='whether to use xian group',
                            type=str2bool,
                            default=False)
        parser.add_argument(
            '--noop',
            help='number of noop actions while starting new episode',
            type=np.int32,
            default=30)
        parser.add_argument('--frames',
                            help='number of stacked frames',
                            type=np.int32,
                            default=4)
        parser.add_argument('--rews_scale',
                            help='scale of rewards',
                            type=np.float32,
                            default=1.0)
        parser.add_argument('--test_eps',
                            help='random action noise in atari testing',
                            type=np.float32,
                            default=0.001)

    env_args_collection = {'atari': atari_args}
    env_args_collection[envs_collection[args.env]]()

    # training arguments
    parser.add_argument('--epoches',
                        help='number of epoches',
                        type=np.int32,
                        default=20)
    parser.add_argument('--cycles',
                        help='number of cycles per epoch',
                        type=np.int32,
                        default=20)
    parser.add_argument('--iterations',
                        help='number of iterations per cycle',
                        type=np.int32,
                        default=100)
    parser.add_argument('--timesteps',
                        help='number of timesteps per iteration',
                        type=np.int32,
                        default=500)

    # testing arguments
    parser.add_argument('--test_rollouts',
                        help='number of rollouts to test per cycle',
                        type=np.int32,
                        default=5)
    parser.add_argument('--test_timesteps',
                        help='number of timesteps per rollout',
                        type=np.int32,
                        default=27000)
    parser.add_argument('--save_rews',
                        help='save cumulative rewards',
                        type=str2bool,
                        default=False)
    parser.add_argument('--save_Q',
                        help='save Q estimation',
                        type=str2bool,
                        default=False)

    # buffer arguments
    parser.add_argument('--buffer',
                        help='type of replay buffer',
                        type=str,
                        default='default',
                        choices=buffer_collection)
    parser.add_argument('--buffer_size',
                        help='number of transitions in replay buffer',
                        type=np.int32,
                        default=1000000)
    parser.add_argument('--batch_size',
                        help='size of sample batch',
                        type=np.int32,
                        default=32)
    parser.add_argument('--warmup',
                        help='number of timesteps for buffer warmup',
                        type=np.int32,
                        default=2000)

    #### modifyed !!!!!!!!!!!!!!

    # algorithm arguments
    def q_learning_args():
        parser.add_argument('--train_batches',
                            help='number of batches to train per iteration',
                            type=np.int32,
                            default=25)
        parser.add_argument('--train_target',
                            help='frequency of target network updating',
                            type=np.int32,
                            default=8000)

        parser.add_argument(
            '--eps_l',
            help='beginning percentage of epsilon greedy explorarion',
            type=np.float32,
            default=1.00)
        parser.add_argument(
            '--eps_r',
            help='final percentage of epsilon greedy explorarion',
            type=np.float32,
            default=0.01)
        parser.add_argument('--eps_decay',
                            help='number of steps to decay epsilon',
                            type=np.int32,
                            default=250000)

        parser.add_argument('--optimizer',
                            help='optimizer to use',
                            type=str,
                            default='adam',
                            choices=['adam', 'rmsprop'])
        args, _ = parser.parse_known_args()
        if args.optimizer == 'adam':
            parser.add_argument('--q_lr',
                                help='learning rate of value network',
                                type=np.float32,
                                default=0.625e-4)
            parser.add_argument('--Adam_eps',
                                help='epsilon factor of Adam optimizer',
                                type=np.float32,
                                default=1.5e-4)
        elif args.optimizer == 'rmsprop':
            parser.add_argument('--q_lr',
                                help='learning rate of value network',
                                type=np.float32,
                                default=2.5e-4)
            parser.add_argument('--RMSProp_decay',
                                help='decay factor of RMSProp optimizer',
                                type=np.float32,
                                default=0.95)
            parser.add_argument('--RMSProp_eps',
                                help='epsilon factor of RMSProp optimizer',
                                type=np.float32,
                                default=1e-2)

        parser.add_argument('--nstep',
                            help='parameter for n-step bootstrapping',
                            type=np.int32,
                            default=1)

    def dqn_args():
        # q_learning_args()
        ddq_args()
        parser.add_argument('--double',
                            help='whether to use double trick',
                            type=str2bool,
                            default=False)
        # parser.add_argument('--dueling', help='whether to use dueling trick', type=str2bool, default=False)

    def cddqn_args():
        q_learning_args()
        parser.add_argument('--dueling',
                            help='whether to use dueling trick',
                            type=str2bool,
                            default=False)

    def mmdqn_args():
        q_learning_args()
        parser.add_argument('--dueling',
                            help='whether to use dueling trick',
                            type=str2bool,
                            default=False)

    def lrdqn_args():
        q_learning_args()
        parser.add_argument('--double',
                            help='whether to use double trick',
                            type=str2bool,
                            default=False)
        parser.add_argument('--rank',
                            help='rank of value matrix',
                            type=np.int32,
                            default=3)
        parser.add_argument('--beta',
                            help='weight of sparsity loss',
                            type=np.float32,
                            default=1.0)

    def ddq_args():
        q_learning_args()
        parser.add_argument(
            '--inner_q_type',
            help=
            'whether to use td3 trick/double trick TD3:min double-Q:double none:mean',
            type=str,
            default='min')
        # parser.add_argument('--td4', help='whether to use td3 trick ', type=str2bool, default=False)
        parser.add_argument('--alpha',
                            help='leaky relu parameter',
                            type=np.float,
                            default=1.)
        parser.add_argument('--tau',
                            help='parameter for smooth target update',
                            type=np.float,
                            default=1.)
        parser.add_argument('--num_q',
                            help='number of q to use',
                            type=np.int32,
                            default=4)
        parser.add_argument('--beta',
                            help='if >0 use lambda return, else use max',
                            type=np.float32,
                            default=-1.)
        parser.add_argument('--state_dim',
                            help='for representation, no use now',
                            type=np.int,
                            default=32)
        parser.add_argument('--dueling',
                            help='whether to use dueling trick',
                            type=str2bool,
                            default=False)
        parser.add_argument('--max_step',
                            help='max step to truncate',
                            type=int,
                            default=-1)

    algorithm_args_collection = {
        'dqn': dqn_args,
        'cddqn': cddqn_args,
        'mmdqn': mmdqn_args,
        'avedqn': dqn_args,
        'lrdqn': lrdqn_args,
        'ddq': ddq_args,
        'ddq6': ddq_args,
        'amc': ddq_args,
    }
    algorithm_args_collection[args.alg]()

    # learner arguments
    def lb_args():
        parser.add_argument('--lb_type',
                            help='type of lower-bound objective',
                            type=str,
                            default='hard',
                            choices=['hard', 'soft'])

    def hash_args():
        lb_args()
        parser.add_argument('--avg_n',
                            help='number of trajectories for moving average',
                            type=np.int32,
                            default=5)

    learner_args_collection = {
        'atari_lb': lb_args,
        'atari_hash_lb': hash_args,
        'atari_vi_lb': hash_args
    }
    if args.learn in learner_args_collection.keys():
        learner_args_collection[args.learn]()

    args = parser.parse_args()
    get_policy_train_type(args)

    logger_name = args.alg + '-' + args.env + '-' + args.learn
    if args.tag != '': logger_name = args.tag + '-' + logger_name
    args.logger = get_logger(logger_name)

    for key, value in args.__dict__.items():
        if key != 'logger':
            args.logger.info('{}: {}'.format(key, value))

    return args
def get_args():
    parser = get_arg_parser()

    parser.add_argument('--tag',
                        help='terminal tag in logger',
                        type=str,
                        default='')
    parser.add_argument('--alg',
                        help='backend algorithm',
                        type=str,
                        default='ddpg',
                        choices=['ddpg', 'ddpg2'])
    parser.add_argument('--learn',
                        help='type of training method',
                        type=str,
                        default='hgg',
                        choices=learner_collection.keys())

    parser.add_argument('--env',
                        help='gym env id',
                        type=str,
                        default='FetchReach-v1',
                        choices=Robotics_envs_id)
    args, _ = parser.parse_known_args()
    if args.env == 'HandReach-v0':
        parser.add_argument('--goal',
                            help='method of goal generation',
                            type=str,
                            default='reach',
                            choices=['vanilla', 'reach'])
    else:
        parser.add_argument(
            '--goal',
            help='method of goal generation',
            type=str,
            default='interval',
            choices=['vanilla', 'fixobj', 'interval', 'obstacle'])
        if args.env[:5] == 'Fetch':
            parser.add_argument('--init_offset',
                                help='initial offset in fetch environments',
                                type=np.float32,
                                default=1.0)
        elif args.env[:4] == 'Hand':
            parser.add_argument('--init_rotation',
                                help='initial rotation in hand environments',
                                type=np.float32,
                                default=0.25)

    parser.add_argument('--gamma',
                        help='discount factor',
                        type=np.float32,
                        default=0.98)
    parser.add_argument('--clip_return',
                        help='whether to clip return value',
                        type=str2bool,
                        default=True)
    parser.add_argument('--eps_act',
                        help='percentage of epsilon greedy explorarion',
                        type=np.float32,
                        default=0.3)
    parser.add_argument(
        '--std_act',
        help='standard deviation of uncorrelated gaussian explorarion',
        type=np.float32,
        default=0.2)

    parser.add_argument('--pi_lr',
                        help='learning rate of policy network',
                        type=np.float32,
                        default=1e-3)
    parser.add_argument('--q_lr',
                        help='learning rate of value network',
                        type=np.float32,
                        default=1e-3)
    parser.add_argument('--act_l2',
                        help='quadratic penalty on actions',
                        type=np.float32,
                        default=1.0)
    parser.add_argument(
        '--polyak',
        help='interpolation factor in polyak averaging for DDPG',
        type=np.float32,
        default=0.95)

    parser.add_argument('--epochs',
                        help='number of epochs',
                        type=np.int32,
                        default=20)
    parser.add_argument('--cycles',
                        help='number of cycles per epoch',
                        type=np.int32,
                        default=20)
    parser.add_argument('--episodes',
                        help='number of episodes per cycle',
                        type=np.int32,
                        default=50)
    parser.add_argument('--timesteps',
                        help='number of timesteps per episode',
                        type=np.int32,
                        default=(50 if args.env[:5] == 'Fetch' else 100))
    parser.add_argument('--train_batches',
                        help='number of batches to train per episode',
                        type=np.int32,
                        default=20)

    parser.add_argument('--buffer_size',
                        help='number of episodes in replay buffer',
                        type=np.int32,
                        default=10000)
    parser.add_argument(
        '--buffer_type',
        help=
        'type of replay buffer / whether to use Energy-Based Prioritization',
        type=str,
        default='energy',
        choices=['normal', 'energy'])
    parser.add_argument('--batch_size',
                        help='size of sample batch',
                        type=np.int32,
                        default=256)
    parser.add_argument('--warmup',
                        help='number of timesteps for buffer warmup',
                        type=np.int32,
                        default=10000)
    parser.add_argument('--her',
                        help='type of hindsight experience replay',
                        type=str,
                        default='future',
                        choices=['none', 'final', 'future'])
    parser.add_argument('--her_ratio',
                        help='ratio of hindsight experience replay',
                        type=np.float32,
                        default=0.8)
    parser.add_argument('--pool_rule',
                        help='rule of collecting achieved states',
                        type=str,
                        default='full',
                        choices=['full', 'final'])

    parser.add_argument('--hgg_c',
                        help='weight of initial distribution in flow learner',
                        type=np.float32,
                        default=3.0)
    parser.add_argument('--hgg_L',
                        help='Lipschitz constant',
                        type=np.float32,
                        default=5.0)
    parser.add_argument('--hgg_pool_size',
                        help='size of achieved trajectories pool',
                        type=np.int32,
                        default=1000)

    parser.add_argument('--save_acc',
                        help='save successful rate',
                        type=str2bool,
                        default=True)

    args = parser.parse_args()
    args.goal_based = (args.env in Robotics_envs_id)
    args.clip_return_l, args.clip_return_r = clip_return_range(args)

    logger_name = args.alg + '-' + args.env + '-' + args.learn
    if args.tag != '': logger_name = args.tag + '-' + logger_name
    args.logger = get_logger(logger_name)

    for key, value in args.__dict__.items():
        if key != 'logger':
            args.logger.info('{}: {}'.format(key, value))

    return args
Пример #5
0
def get_args():
    parser = get_arg_parser()

    parser.add_argument('--tag', help='terminal tag in logger', type=str, default='')
    parser.add_argument('--alg', help='backend algorithm', type=str, default='ddpg', choices=['ddpg', 'ddpg2'])
    parser.add_argument('--learn', help='type of training method', type=str, default='hgg',
                        choices=learner_collection.keys())

    parser.add_argument('--env', help='gym env id', type=str, default='FetchReach-v1',
                        choices=Robotics_envs_id + Kuka_envs_id)
    args, _ = parser.parse_known_args()
    if args.env == 'HandReach-v0':
        parser.add_argument('--goal', help='method of goal generation', type=str, default='reach',
                            choices=['vanilla', 'reach'])
    else:
        parser.add_argument('--goal', help='method of goal generation', type=str, default='interval',
                            choices=['vanilla', 'fixobj', 'interval', 'custom'])
        if args.env[:5] == 'fetch':
            parser.add_argument('--init_offset', help='initial offset in fetch environments', type=np.float32,
                                default=1.0)
        elif args.env[:4] == 'Hand':
            parser.add_argument('--init_rotation', help='initial rotation in hand environments', type=np.float32,
                                default=0.25)
    parser.add_argument('--graph', help='g-hgg yes or no', type=str2bool, default=False)
    parser.add_argument('--route', help='use route to help hgg find target or not', type=str2bool, default=False)
    # route only for testing
    parser.add_argument('--show_goals', help='number of goals to show', type=np.int32, default=0)
    parser.add_argument('--play_path', help='path to meta_file directory for play', type=str, default=None)
    parser.add_argument('--play_epoch', help='epoch to play', type=str, default='latest')
    parser.add_argument('--stop_hgg_threshold',
                        help='threshold of goals inside goalspace, between 0 and 1, deactivated by default value 2!',
                        type=np.float32, default=2)

    parser.add_argument('--n_x', help='number of vertices in x-direction for g-hgg', type=int, default=31)
    parser.add_argument('--n_y', help='number of vertices in y-direction for g-hgg', type=int, default=31)
    parser.add_argument('--n_z', help='number of vertices in z-direction for g-hgg', type=int, default=11)

    parser.add_argument('--gamma', help='discount factor', type=np.float32, default=0.98)
    parser.add_argument('--clip_return', help='whether to clip return value', type=str2bool, default=True)
    parser.add_argument('--eps_act', help='percentage of epsilon greedy explorarion', type=np.float32, default=0.3)
    parser.add_argument('--std_act', help='standard deviation of uncorrelated gaussian explorarion', type=np.float32,
                        default=0.2)

    parser.add_argument('--pi_lr', help='learning rate of policy network', type=np.float32, default=1e-3)
    parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=1e-3)
    parser.add_argument('--act_l2', help='quadratic penalty on actions', type=np.float32, default=1.0)
    parser.add_argument('--polyak', help='interpolation factor in polyak averaging for DDPG', type=np.float32,
                        default=0.95)

    parser.add_argument('--epoches', help='number of epoches', type=np.int32, default=20)
    parser.add_argument('--cycles', help='number of cycles per epoch', type=np.int32, default=20)
    parser.add_argument('--episodes', help='number of episodes per cycle', type=np.int32, default=50)
    parser.add_argument('--timesteps', help='number of timesteps per episode', type=np.int32,
                        default=(50 if args.env[:5] == 'fetch' else 100))
    parser.add_argument('--train_batches', help='number of batches to train per episode', type=np.int32, default=20)
    parser.add_argument('--curriculum', type=str2bool, default=False)
    parser.add_argument('--buffer_size', help='number of episodes in replay buffer', type=np.int32, default=10000)
    parser.add_argument('--buffer_type', help='type of replay buffer / whether to use Energy-Based Prioritization',
                        type=str, default='energy', choices=['normal', 'energy'])
    parser.add_argument('--rhg', help='record hindsight goals in different learning stage or not', type=str2bool,
                        default=False)
    parser.add_argument('--batch_size', help='size of sample batch', type=np.int32, default=256)
    parser.add_argument('--warmup', help='number of timesteps for buffer warmup', type=np.int32, default=10000)
    parser.add_argument('--her', help='type of hindsight experience replay', type=str, default='future',
                        choices=['none', 'final', 'future'])
    parser.add_argument('--her_ratio', help='ratio of hindsight experience replay', type=np.float32, default=0.8)
    parser.add_argument('--pool_rule', help='rule of collecting achieved states', type=str, default='full',
                        choices=['full', 'final'])

    parser.add_argument('--hgg_c', help='weight of initial distribution in flow learner', type=np.float32, default=3.0)
    parser.add_argument('--hgg_L', help='Lipschitz constant', type=np.float32, default=5.0)
    parser.add_argument('--hgg_pool_size', help='size of achieved trajectories pool', type=np.int32, default=1000)
    parser.add_argument('--balance_sigma', help='balance parameters', type=np.float32, default=0.3)
    parser.add_argument('--balance_eta', help='balance parameters', type=np.float32, default=1000)
    parser.add_argument('--record', help='record videos', type=bool, default=False)

    parser.add_argument('--save_acc', help='save successful rate', type=str2bool, default=True)

    args = parser.parse_args()
    args.num_vertices = [args.n_x, args.n_y, args.n_z]
    args.goal_based = (args.env in (Robotics_envs_id + Kuka_envs_id))
    args.clip_return_l, args.clip_return_r = clip_return_range(args)

    logger_name = args.alg + '-' + args.env + '-' + args.learn
    if args.tag != '': logger_name = args.tag + '-' + logger_name
    if args.graph:
        logger_name = logger_name + '-graph'
    if args.stop_hgg_threshold < 1:
        logger_name = logger_name + '-stop'
    if args.curriculum:
        logger_name = logger_name + '-curriculum'

    args.logger = get_logger(logger_name)

    for key, value in args.__dict__.items():
        if key != 'logger':
            args.logger.info('{}: {}'.format(key, value))
    return args
Пример #6
0
def get_config():
    parser = get_arg_parser()
    parser.add_argument('--seed', help='random seed', type=int, default=0)

    ######## hardware configure
    parser.add_argument('--gpu',
                        help='which gpu exp assigns on',
                        type=str,
                        default="0")

    ######## here for extension modules
    parser.add_argument('--sr',
                        help='if use Sibling Rivalry or not',
                        type=str2bool,
                        default=False)
    parser.add_argument('--goalgan',
                        help='if use goalgan or not',
                        type=str2bool,
                        default=False)
    parser.add_argument('--fgi',
                        help='relabel the goal with foresight goal inference',
                        type=str2bool,
                        default=False)
    parser.add_argument('--foresight_length',
                        help='foresight length',
                        type=int,
                        default=10)
    parser.add_argument('--goal_generator',
                        help='if use goal generator',
                        type=str2bool,
                        default=False)
    parser.add_argument('--model_based_training',
                        help='if MB training',
                        type=str2bool,
                        default=False)
    parser.add_argument('--training_freq',
                        help='training times',
                        type=int,
                        default=10)
    parser.add_argument('--extend_length',
                        help='extend length',
                        type=int,
                        default=3)
    parser.add_argument('--her_before_fgi',
                        help='her before fgi',
                        type=bool,
                        default=True)
    parser.add_argument('--test_last_step',
                        help='judge success whether use the last step',
                        type=bool,
                        default=False)
    ######## here for env model configs
    parser.add_argument('--fake',
                        help='use env model',
                        type=str2bool,
                        default=False)
    parser.add_argument('--env_num_networks',
                        help='num networks',
                        type=int,
                        default=6)
    parser.add_argument('--env_elites', help='elites', type=int, default=3)
    parser.add_argument('--env_hidden',
                        help='env hidden',
                        type=int,
                        default=200)
    parser.add_argument('--distance_threshold',
                        help='distance_threshold',
                        type=float,
                        default=0.05)
    ########
    parser.add_argument('--tag',
                        help='terminal tag in logger',
                        type=str,
                        default='')
    parser.add_argument('--alg',
                        help='backend algorithm',
                        type=str,
                        default='ddpg',
                        choices=['ddpg', 'ddpg2'])
    parser.add_argument('--learn',
                        help='type of training method',
                        type=str,
                        default='hgg',
                        choices=learner_collection.keys())

    parser.add_argument(
        '--env', help='gym env id', type=str,
        default='FetchReach-v1')  # here removed choice in envs.
    args, _ = parser.parse_known_args()
    if args.env == 'HandReach-v0':
        parser.add_argument('--goal',
                            help='method of goal generation',
                            type=str,
                            default='reach',
                            choices=['vanilla', 'reach'])
    else:
        parser.add_argument('--goal',
                            help='method of goal generation',
                            type=str,
                            default='interval',
                            choices=['vanilla', 'fixobj', 'interval'])
        if args.env[:5] == 'Fetch':
            parser.add_argument('--init_offset',
                                help='initial offset in fetch environments',
                                type=np.float32,
                                default=1.0)
        elif args.env[:4] == 'Hand':
            parser.add_argument('--init_rotation',
                                help='initial rotation in hand environments',
                                type=np.float32,
                                default=0.25)

    parser.add_argument('--gamma',
                        help='discount factor',
                        type=np.float32,
                        default=0.98)
    parser.add_argument('--clip_return',
                        help='whether to clip return value',
                        type=str2bool,
                        default=True)
    parser.add_argument('--eps_act',
                        help='percentage of epsilon greedy explorarion',
                        type=np.float32,
                        default=0.3)
    parser.add_argument(
        '--std_act',
        help='standard deviation of uncorrelated gaussian explorarion',
        type=np.float32,
        default=0.2)

    parser.add_argument('--pi_lr',
                        help='learning rate of policy network',
                        type=np.float32,
                        default=1e-3)
    parser.add_argument('--q_lr',
                        help='learning rate of value network',
                        type=np.float32,
                        default=1e-3)
    parser.add_argument('--act_l2',
                        help='quadratic penalty on actions',
                        type=np.float32,
                        default=1.0)
    parser.add_argument(
        '--polyak',
        help='interpolation factor in polyak averaging for DDPG',
        type=np.float32,
        default=0.95)

    parser.add_argument('--epoches',
                        help='number of epoches',
                        type=np.int32,
                        default=20)
    parser.add_argument('--cycles',
                        help='number of cycles per epoch',
                        type=np.int32,
                        default=15)
    parser.add_argument('--episodes',
                        help='number of episodes per cycle',
                        type=np.int32,
                        default=50)
    parser.add_argument('--timesteps',
                        help='number of timesteps per episode',
                        type=np.int32,
                        default=(50 if args.env[:5] == 'Fetch' else 100))
    parser.add_argument('--train_batches',
                        help='number of batches to train per episode',
                        type=np.int32,
                        default=20)

    parser.add_argument('--buffer_size',
                        help='number of episodes in replay buffer',
                        type=np.int32,
                        default=10000)
    parser.add_argument(
        '--buffer_type',
        help=
        'type of replay buffer / whether to use Energy-Based Prioritization',
        type=str,
        default='normal',
        choices=['normal', 'energy'])
    parser.add_argument('--batch_size',
                        help='size of sample batch',
                        type=np.int32,
                        default=256)
    parser.add_argument('--warmup',
                        help='number of timesteps for buffer warmup',
                        type=np.int32,
                        default=10000)
    parser.add_argument('--her',
                        help='type of hindsight experience replay',
                        type=str,
                        default='future',
                        choices=['none', 'final', 'future'])
    parser.add_argument('--her_ratio',
                        help='ratio of hindsight experience replay',
                        type=np.float32,
                        default=0.8)
    parser.add_argument('--pool_rule',
                        help='rule of collecting achieved states',
                        type=str,
                        default='full',
                        choices=['full', 'final'])

    parser.add_argument('--hgg_c',
                        help='weight of initial distribution in flow learner',
                        type=np.float32,
                        default=3.0)
    parser.add_argument('--hgg_L',
                        help='Lipschitz constant',
                        type=np.float32,
                        default=5.0)
    parser.add_argument('--hgg_pool_size',
                        help='size of achieved trajectories pool',
                        type=np.int32,
                        default=1000)

    parser.add_argument('--save_acc',
                        help='save successful rate',
                        type=str2bool,
                        default=True)

    args = parser.parse_args()

    # gpu visible setting
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    args.goal_based = True
    args.clip_return_l, args.clip_return_r = clip_return_range(args)

    logger_name = args.alg + '-' + args.env + '-' + args.learn
    if args.tag != '':
        logger_name = args.tag + '-' + logger_name
    args.logger = get_logger(logger_name)

    for key, value in args.__dict__.items():
        if key != 'logger':
            args.logger.info('{}: {}'.format(key, value))

    # predefine the corresponding dims in different envs.
    env_alt = {
        'Mountaincar-v0': {
            'start_in_obs': 0,
            'end_in_obs': 1,
            'desire_dim': 1,
            'step_fake_param': 3,
            'env_model_obs_dim': 2,
            'env_model_act_dim': 1
        },
        'FetchPush-v1': {
            'start_in_obs': 3,
            'end_in_obs': 6,
            'desire_dim': 3,
            'step_fake_param': 29,
            'env_model_obs_dim': 25,
            'env_model_act_dim': 4
        },
        'World-v0': {
            'start_in_obs': 0,
            'end_in_obs': 2,
            'desire_dim': 2,
            'step_fake_param': 4,
            'env_model_obs_dim': 2,
            'env_model_act_dim': 2
        },
        'FetchReach-v1': {
            'start_in_obs': 0,
            'end_in_obs': 3,
            'desire_dim': 3,
            'step_fake_param': 14,
            'env_model_obs_dim': 10,
            'env_model_act_dim': 4
        },
        'Pendulum-v0': {
            'start_in_obs': 1,
            'end_in_obs': 3,
            'desire_dim': 2,
            'step_fake_param': 4,
            'env_model_obs_dim': 3,
            'env_model_act_dim': 1
        },
        'AntLocomotion-v0': {
            'start_in_obs': 0,
            'end_in_obs': 2,
            'desire_dim': 2,
            'step_fake_param': 37,
            'env_model_obs_dim': 29,
            'env_model_act_dim': 8
        },
        'AntLocomotionDiverse-v0': {
            'start_in_obs': 0,
            'end_in_obs': 2,
            'desire_dim': 2,
            'step_fake_param': 37,
            'env_model_obs_dim': 29,
            'env_model_act_dim': 8
        },
        'HalfCheetahGoal-v0': {
            'start_in_obs': 8,
            'end_in_obs': 9,
            'desire_dim': 1,
            'step_fake_param': 23,
            'env_model_obs_dim': 17,
            'env_model_act_dim': 6
        },
        'Reacher-v0': {
            'start_in_obs': 9,
            'end_in_obs': 11,
            'desire_dim': 2,
            'step_fake_param': 14,
            'env_model_obs_dim': 11,
            'env_model_act_dim': 3
        }
    }

    args.env_params = env_alt[args.env]

    args.model_loss_log_name = args.tag + time.strftime('-(%Y-%m-%d-%H:%M:%S)')

    return args