Пример #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--env',
        type=str,
        default='MineRLTreechop-v0',
        choices=[
            'MineRLTreechop-v0',
            'MineRLNavigate-v0',
            'MineRLNavigateDense-v0',
            'MineRLNavigateExtreme-v0',
            'MineRLNavigateExtremeDense-v0',
            'MineRLObtainIronPickaxe-v0',
            'MineRLObtainIronPickaxeDense-v0',
            'MineRLObtainDiamond-v0',
            'MineRLObtainDiamondDense-v0',
            # for debug use
            'MineRLNavigateDenseFixed-v0',
            'MineRLObtainTest-v0',
        ],
        help='MineRL environment identifier.')
    parser.add_argument(
        '--outdir',
        type=str,
        default='results',
        help=
        'Directory path to save output files. If it does not exist, it will be created.'
    )
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 31)')
    parser.add_argument('--eval-n-runs', type=int, default=10)
    parser.add_argument('--logging-level',
                        type=int,
                        default=20,
                        help='Logging level. 10:DEBUG, 20:INFO etc.')
    parser.add_argument(
        '--monitor',
        action='store_true',
        default=False,
        help=
        'Monitor env. Videos and additional information are saved as output files when evaluation.'
    )
    args = parser.parse_args()

    args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir)

    import logging
    log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s'
    logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'),
                        format=log_format,
                        level=args.logging_level)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(args.logging_level)
    console_handler.setFormatter(logging.Formatter(log_format))
    logging.getLogger('').addHandler(
        console_handler)  # add hander to the root logger

    logger.info('Output files are saved in {}'.format(args.outdir))

    utils.log_versions()

    try:
        _main(args)
    except:  # noqa
        logger.exception('execution failed.')
        raise
Пример #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--env',
        type=str,
        default='MineRLTreechop-v0',
        choices=[
            'MineRLTreechop-v0',
            'MineRLNavigate-v0',
            'MineRLNavigateDense-v0',
            'MineRLNavigateExtreme-v0',
            'MineRLNavigateExtremeDense-v0',
            'MineRLObtainIronPickaxe-v0',
            'MineRLObtainIronPickaxeDense-v0',
            'MineRLObtainDiamond-v0',
            'MineRLObtainDiamondDense-v0',
            # for debug use
            'MineRLNavigateDenseFixed-v0',
            'MineRLObtainTest-v0',
        ],
        help='MineRL environment identifier.')
    parser.add_argument(
        '--outdir',
        type=str,
        default='results',
        help=
        'Directory path to save output files. If it does not exist, it will be created.'
    )
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 31)')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU to use, set to -1 if no GPU.')
    parser.add_argument('--demo', action='store_true', default=False)
    parser.add_argument('--load', type=str, default=None)
    parser.add_argument('--arch',
                        type=str,
                        default='nature',
                        choices=['nature'],
                        help='Network architecture to use.')
    # In the original paper, agent runs in 8 environments parallely and samples 128 steps per environment.
    # Sample 128 * 8 steps, instead.
    parser.add_argument('--update-interval',
                        type=int,
                        default=128 * 8,
                        help='Frequency (in timesteps) of network updates.')
    parser.add_argument('--eval-n-runs', type=int, default=3)
    parser.add_argument('--weight-decay', type=float, default=0.0)
    parser.add_argument('--logging-level',
                        type=int,
                        default=20,
                        help='Logging level. 10:DEBUG, 20:INFO etc.')
    parser.add_argument('--gray-scale',
                        action='store_true',
                        default=False,
                        help='Convert pov into gray scaled image.')
    parser.add_argument(
        '--monitor',
        action='store_true',
        default=False,
        help=
        'Monitor env. Videos and additional information are saved as output files when evaluation.'
    )
    parser.add_argument('--lr',
                        type=float,
                        default=2.5e-4,
                        help='Learning rate.')
    parser.add_argument('--adam-eps',
                        type=float,
                        default=1e-8,
                        help='Epsilon for Adam.')
    parser.add_argument('--frame-stack',
                        type=int,
                        default=None,
                        help='Number of frames stacked (None for disable).')
    parser.add_argument('--frame-skip',
                        type=int,
                        default=None,
                        help='Number of frames skipped (None for disable).')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.99,
                        help='Discount rate.')
    parser.add_argument(
        '--epochs',
        type=int,
        default=3,
        help='Number of epochs to update model for per PPO iteration.')
    parser.add_argument('--standardize-advantages',
                        action='store_true',
                        default=False,
                        help='Use standardized advantages on updates for PPO')
    parser.add_argument(
        '--disable-action-prior',
        action='store_true',
        default=False,
        help=
        'If specified, action_space shaping based on prior knowledge will be disabled.'
    )
    parser.add_argument(
        '--always-keys',
        type=str,
        default=None,
        nargs='*',
        help=
        'List of action keys, which should be always pressed throughout interaction with environment.'
    )
    parser.add_argument(
        '--reverse-keys',
        type=str,
        default=None,
        nargs='*',
        help=
        'List of action keys, which should be always pressed but can be turn off via action.'
    )
    parser.add_argument(
        '--exclude-keys',
        type=str,
        default=None,
        nargs='*',
        help=
        'List of action keys, which should be ignored for discretizing action space.'
    )
    parser.add_argument(
        '--exclude-noop',
        action='store_true',
        default=False,
        help='The "noop" will be excluded from discrete action list.')
    args = parser.parse_args()

    args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir)

    import logging
    log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s'
    logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'),
                        format=log_format,
                        level=args.logging_level)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(args.logging_level)
    console_handler.setFormatter(logging.Formatter(log_format))
    logging.getLogger('').addHandler(
        console_handler)  # add hander to the root logger

    logger.info('Output files are saved in {}'.format(args.outdir))

    utils.log_versions()

    try:
        _main(args)
    except:  # noqa
        logger.exception('execution failed.')
        raise
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--env',
        type=str,
        default='MineRLTreechop-v0',
        choices=[
            'MineRLTreechop-v0',
            'MineRLNavigate-v0',
            'MineRLNavigateDense-v0',
            'MineRLNavigateExtreme-v0',
            'MineRLNavigateExtremeDense-v0',
            'MineRLObtainIronPickaxe-v0',
            'MineRLObtainIronPickaxeDense-v0',
            'MineRLObtainDiamond-v0',
            'MineRLObtainDiamondDense-v0',
            # for debug use
            'MineRLNavigateDenseFixed-v0',
            'MineRLObtainTest-v0',
        ],
        help='MineRL environment identifier.')
    parser.add_argument(
        '--outdir',
        type=str,
        default='results',
        help=
        'Directory path to save output files. If it does not exist, it will be created.'
    )
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='Random seed [0, 2 ** 31)')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU to use, set to -1 if no GPU.')
    parser.add_argument('--demo', action='store_true', default=False)
    parser.add_argument('--load', type=str, default=None)
    parser.add_argument(
        '--final-exploration-frames',
        type=int,
        default=10**6,
        help='Timesteps after which we stop annealing exploration rate')
    parser.add_argument('--final-epsilon',
                        type=float,
                        default=0.01,
                        help='Final value of epsilon during training.')
    parser.add_argument('--eval-epsilon',
                        type=float,
                        default=0.001,
                        help='Exploration epsilon used during eval episodes.')
    parser.add_argument(
        '--noisy-net-sigma',
        type=float,
        default=None,
        help='NoisyNet explorer switch. This disables following options: '
        '--final-exploration-frames, --final-epsilon, --eval-epsilon')
    parser.add_argument('--arch',
                        type=str,
                        default='dueling',
                        choices=['dueling', 'distributed_dueling'],
                        help='Network architecture to use.')
    parser.add_argument('--replay-capacity',
                        type=int,
                        default=10**6,
                        help='Maximum capacity for replay buffer.')
    parser.add_argument(
        '--replay-start-size',
        type=int,
        default=5 * 10**4,
        help='Minimum replay buffer size before performing gradient updates.')
    parser.add_argument(
        '--target-update-interval',
        type=int,
        default=3 * 10**4,
        help='Frequency (in timesteps) at which the target network is updated.'
    )
    parser.add_argument('--update-interval',
                        type=int,
                        default=4,
                        help='Frequency (in timesteps) of network updates.')
    parser.add_argument('--eval-n-runs', type=int, default=3)
    parser.add_argument('--no-clip-delta',
                        dest='clip_delta',
                        action='store_false')
    parser.set_defaults(clip_delta=True)
    parser.add_argument('--num-step-return', type=int, default=1)
    parser.add_argument(
        '--agent',
        type=str,
        default='DQN',
        choices=['DQN', 'DoubleDQN', 'PAL', 'CategoricalDoubleDQN'])
    parser.add_argument('--logging-level',
                        type=int,
                        default=20,
                        help='Logging level. 10:DEBUG, 20:INFO etc.')
    parser.add_argument('--gray-scale',
                        action='store_true',
                        default=False,
                        help='Convert pov into gray scaled image.')
    parser.add_argument(
        '--monitor',
        action='store_true',
        default=False,
        help=
        'Monitor env. Videos and additional information are saved as output files when evaluation.'
    )
    parser.add_argument('--lr',
                        type=float,
                        default=2.5e-4,
                        help='Learning rate.')
    parser.add_argument('--adam-eps',
                        type=float,
                        default=1e-8,
                        help='Epsilon for Adam.')
    parser.add_argument('--prioritized',
                        action='store_true',
                        default=False,
                        help='Use prioritized experience replay.')
    parser.add_argument('--frame-stack',
                        type=int,
                        default=None,
                        help='Number of frames stacked (None for disable).')
    parser.add_argument('--frame-skip',
                        type=int,
                        default=None,
                        help='Number of frames skipped (None for disable).')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.99,
                        help='Discount rate.')
    parser.add_argument('--batch-accumulator',
                        type=str,
                        default='sum',
                        choices=['sum', 'mean'],
                        help='accumulator for batch loss.')
    parser.add_argument(
        '--disable-action-prior',
        action='store_true',
        default=False,
        help=
        'If specified, action_space shaping based on prior knowledge will be disabled.'
    )
    parser.add_argument(
        '--always-keys',
        type=str,
        default=None,
        nargs='*',
        help=
        'List of action keys, which should be always pressed throughout interaction with environment.'
    )
    parser.add_argument(
        '--reverse-keys',
        type=str,
        default=None,
        nargs='*',
        help=
        'List of action keys, which should be always pressed but can be turn off via action.'
    )
    parser.add_argument(
        '--exclude-keys',
        type=str,
        default=None,
        nargs='*',
        help=
        'List of action keys, which should be ignored for discretizing action space.'
    )
    parser.add_argument(
        '--exclude-noop',
        action='store_true',
        default=False,
        help='The "noop" will be excluded from discrete action list.')
    args = parser.parse_args()

    args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir)

    import logging
    log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s'
    logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'),
                        format=log_format,
                        level=args.logging_level)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(args.logging_level)
    console_handler.setFormatter(logging.Formatter(log_format))
    logging.getLogger('').addHandler(
        console_handler)  # add hander to the root logger

    logger.info('Output files are saved in {}'.format(args.outdir))

    utils.log_versions()

    try:
        _main(args)
    except:  # noqa
        logger.exception('execution failed.')
        raise
def main():
    parser = argparse.ArgumentParser()

    env_choices = [
        # basic envs
        'MineRLTreechop-v0',
        'MineRLNavigate-v0', 'MineRLNavigateDense-v0', 'MineRLNavigateExtreme-v0', 'MineRLNavigateExtremeDense-v0',
        'MineRLObtainIronPickaxe-v0', 'MineRLObtainIronPickaxeDense-v0',
        'MineRLObtainDiamond-v0', 'MineRLObtainDiamondDense-v0',
        # obfuscated envs
        'MineRLTreechopVectorObf-v0',
        'MineRLNavigateVectorObf-v0', 'MineRLNavigateExtremeVectorObf-v0',
        # MineRL data pipeline fails for these envs: https://github.com/minerllabs/minerl/issues/364
        # 'MineRLNavigateDenseVectorObf-v0', 'MineRLNavigateExtremeDenseVectorObf-v0',
        'MineRLObtainDiamondVectorObf-v0', 'MineRLObtainDiamondDenseVectorObf-v0',
        'MineRLObtainIronPickaxeVectorObf-v0', 'MineRLObtainIronPickaxeDenseVectorObf-v0',
        # for debugging
        'MineRLNavigateDenseFixed-v0', 'MineRLObtainTest-v0',
    ]
    parser.add_argument('--env', type=str, choices=env_choices, required=True,
                        help='MineRL environment identifier.')

    # meta settings
    parser.add_argument('--outdir', type=str, default='results',
                        help='Directory path to save output files. If it does not exist, it will be created.')
    parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 31)')
    parser.add_argument('--gpu', type=int, default=0, help='GPU to use, set to -1 if no GPU.')
    parser.add_argument('--demo', action='store_true', default=False)
    parser.add_argument('--load', type=str, default=None)
    parser.add_argument('--logging-level', type=int, default=20, help='Logging level. 10:DEBUG, 20:INFO etc.')
    parser.add_argument('--eval-n-runs', type=int, default=3)
    parser.add_argument('--monitor', action='store_true', default=False,
                        help='Monitor env. Videos and additional information are saved as output files when evaluation.')

    # training scheme (agent)
    parser.add_argument('--agent', type=str, default='DQN', choices=['DQN', 'DoubleDQN', 'PAL', 'CategoricalDoubleDQN'])

    # network architecture
    parser.add_argument('--arch', type=str, default='dueling', choices=['dueling', 'distributed_dueling'],
                        help='Network architecture to use.')

    # update rule settings
    parser.add_argument('--update-interval', type=int, default=4, help='Frequency (in timesteps) of network updates.')
    parser.add_argument('--frame-skip', type=int, default=None, help='Number of frames skipped (None for disable).')
    parser.add_argument('--gamma', type=float, default=0.99, help='Discount rate.')
    parser.add_argument('--no-clip-delta', dest='clip_delta', action='store_false')
    parser.set_defaults(clip_delta=True)
    parser.add_argument('--num-step-return', type=int, default=1)
    parser.add_argument('--lr', type=float, default=2.5e-4, help='Learning rate.')
    parser.add_argument('--adam-eps', type=float, default=1e-8, help='Epsilon for Adam.')
    parser.add_argument('--batch-accumulator', type=str, default='sum', choices=['sum', 'mean'], help='accumulator for batch loss.')

    # observation conversion related settings
    parser.add_argument('--gray-scale', action='store_true', default=False, help='Convert pov into gray scaled image.')
    parser.add_argument('--frame-stack', type=int, default=None, help='Number of frames stacked (None for disable).')

    # exploration related settings
    parser.add_argument('--final-exploration-frames', type=int, default=10 ** 6,
                        help='Timesteps after which we stop annealing exploration rate')
    parser.add_argument('--final-epsilon', type=float, default=0.01, help='Final value of epsilon during training.')
    parser.add_argument('--eval-epsilon', type=float, default=0.001, help='Exploration epsilon used during eval episodes.')
    parser.add_argument('--noisy-net-sigma', type=float, default=None,
                        help='NoisyNet explorer switch. This disables following options: '
                        '--final-exploration-frames, --final-epsilon, --eval-epsilon')

    # experience replay buffer related settings
    parser.add_argument('--replay-capacity', type=int, default=10 ** 6, help='Maximum capacity for replay buffer.')
    parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4,
                        help='Minimum replay buffer size before performing gradient updates.')
    parser.add_argument('--prioritized', action='store_true', default=False, help='Use prioritized experience replay.')

    # target network related settings
    parser.add_argument('--target-update-interval', type=int, default=3 * 10 ** 4,
                        help='Frequency (in timesteps) at which the target network is updated.')

    # K-means related settings
    parser.add_argument('--kmeans-n-clusters', type=int, default=30, help='#clusters for K-means')

    args = parser.parse_args()

    args.outdir = pfrl.experiments.prepare_output_dir(args, args.outdir)

    log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s'
    logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'), format=log_format, level=args.logging_level)
    console_handler = logging.StreamHandler()
    console_handler.setLevel(args.logging_level)
    console_handler.setFormatter(logging.Formatter(log_format))
    logging.getLogger('').addHandler(console_handler)  # add hander to the root logger

    logger.info('Output files will be saved in {}'.format(args.outdir))

    utils.log_versions()

    try:
        dqn_family(
            # meta setttings
            env_id=args.env,
            outdir=args.outdir,
            seed=args.seed,
            gpu=args.gpu,
            demo=args.demo,
            load=args.load,
            eval_n_runs=args.eval_n_runs,
            monitor=args.monitor,
            # hyper params
            agent_type=args.agent,
            arch=args.arch,
            update_interval=args.update_interval,
            frame_skip=args.frame_skip,
            gamma=args.gamma,
            clip_delta=args.clip_delta,
            num_step_return=args.num_step_return,
            lr=args.lr,
            adam_eps=args.adam_eps,
            batch_accumulator=args.batch_accumulator,
            gray_scale=args.gray_scale,
            frame_stack=args.frame_stack,
            final_exploration_frames=args.final_exploration_frames,
            final_epsilon=args.final_epsilon,
            eval_epsilon=args.eval_epsilon,
            noisy_net_sigma=args.noisy_net_sigma,
            replay_capacity=args.replay_capacity,
            replay_start_size=args.replay_start_size,
            prioritized=args.prioritized,
            target_update_interval=args.target_update_interval,
            kmeans_n_clusters=args.kmeans_n_clusters,
        )
    except:  # noqa
        logger.exception('execution failed.')
        raise