def main(): parser = argparse.ArgumentParser() parser.add_argument( '--env', type=str, default='MineRLTreechop-v0', choices=[ 'MineRLTreechop-v0', 'MineRLNavigate-v0', 'MineRLNavigateDense-v0', 'MineRLNavigateExtreme-v0', 'MineRLNavigateExtremeDense-v0', 'MineRLObtainIronPickaxe-v0', 'MineRLObtainIronPickaxeDense-v0', 'MineRLObtainDiamond-v0', 'MineRLObtainDiamondDense-v0', # for debug use 'MineRLNavigateDenseFixed-v0', 'MineRLObtainTest-v0', ], help='MineRL environment identifier.') parser.add_argument( '--outdir', type=str, default='results', help= 'Directory path to save output files. If it does not exist, it will be created.' ) parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 31)') parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--logging-level', type=int, default=20, help='Logging level. 10:DEBUG, 20:INFO etc.') parser.add_argument( '--monitor', action='store_true', default=False, help= 'Monitor env. Videos and additional information are saved as output files when evaluation.' ) args = parser.parse_args() args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir) import logging log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s' logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'), format=log_format, level=args.logging_level) console_handler = logging.StreamHandler() console_handler.setLevel(args.logging_level) console_handler.setFormatter(logging.Formatter(log_format)) logging.getLogger('').addHandler( console_handler) # add hander to the root logger logger.info('Output files are saved in {}'.format(args.outdir)) utils.log_versions() try: _main(args) except: # noqa logger.exception('execution failed.') raise
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--env', type=str, default='MineRLTreechop-v0', choices=[ 'MineRLTreechop-v0', 'MineRLNavigate-v0', 'MineRLNavigateDense-v0', 'MineRLNavigateExtreme-v0', 'MineRLNavigateExtremeDense-v0', 'MineRLObtainIronPickaxe-v0', 'MineRLObtainIronPickaxeDense-v0', 'MineRLObtainDiamond-v0', 'MineRLObtainDiamondDense-v0', # for debug use 'MineRLNavigateDenseFixed-v0', 'MineRLObtainTest-v0', ], help='MineRL environment identifier.') parser.add_argument( '--outdir', type=str, default='results', help= 'Directory path to save output files. If it does not exist, it will be created.' ) parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 31)') parser.add_argument('--gpu', type=int, default=0, help='GPU to use, set to -1 if no GPU.') parser.add_argument('--demo', action='store_true', default=False) parser.add_argument('--load', type=str, default=None) parser.add_argument('--arch', type=str, default='nature', choices=['nature'], help='Network architecture to use.') # In the original paper, agent runs in 8 environments parallely and samples 128 steps per environment. # Sample 128 * 8 steps, instead. parser.add_argument('--update-interval', type=int, default=128 * 8, help='Frequency (in timesteps) of network updates.') parser.add_argument('--eval-n-runs', type=int, default=3) parser.add_argument('--weight-decay', type=float, default=0.0) parser.add_argument('--logging-level', type=int, default=20, help='Logging level. 10:DEBUG, 20:INFO etc.') parser.add_argument('--gray-scale', action='store_true', default=False, help='Convert pov into gray scaled image.') parser.add_argument( '--monitor', action='store_true', default=False, help= 'Monitor env. Videos and additional information are saved as output files when evaluation.' ) parser.add_argument('--lr', type=float, default=2.5e-4, help='Learning rate.') parser.add_argument('--adam-eps', type=float, default=1e-8, help='Epsilon for Adam.') parser.add_argument('--frame-stack', type=int, default=None, help='Number of frames stacked (None for disable).') parser.add_argument('--frame-skip', type=int, default=None, help='Number of frames skipped (None for disable).') parser.add_argument('--gamma', type=float, default=0.99, help='Discount rate.') parser.add_argument( '--epochs', type=int, default=3, help='Number of epochs to update model for per PPO iteration.') parser.add_argument('--standardize-advantages', action='store_true', default=False, help='Use standardized advantages on updates for PPO') parser.add_argument( '--disable-action-prior', action='store_true', default=False, help= 'If specified, action_space shaping based on prior knowledge will be disabled.' ) parser.add_argument( '--always-keys', type=str, default=None, nargs='*', help= 'List of action keys, which should be always pressed throughout interaction with environment.' ) parser.add_argument( '--reverse-keys', type=str, default=None, nargs='*', help= 'List of action keys, which should be always pressed but can be turn off via action.' ) parser.add_argument( '--exclude-keys', type=str, default=None, nargs='*', help= 'List of action keys, which should be ignored for discretizing action space.' ) parser.add_argument( '--exclude-noop', action='store_true', default=False, help='The "noop" will be excluded from discrete action list.') args = parser.parse_args() args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir) import logging log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s' logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'), format=log_format, level=args.logging_level) console_handler = logging.StreamHandler() console_handler.setLevel(args.logging_level) console_handler.setFormatter(logging.Formatter(log_format)) logging.getLogger('').addHandler( console_handler) # add hander to the root logger logger.info('Output files are saved in {}'.format(args.outdir)) utils.log_versions() try: _main(args) except: # noqa logger.exception('execution failed.') raise
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--env', type=str, default='MineRLTreechop-v0', choices=[ 'MineRLTreechop-v0', 'MineRLNavigate-v0', 'MineRLNavigateDense-v0', 'MineRLNavigateExtreme-v0', 'MineRLNavigateExtremeDense-v0', 'MineRLObtainIronPickaxe-v0', 'MineRLObtainIronPickaxeDense-v0', 'MineRLObtainDiamond-v0', 'MineRLObtainDiamondDense-v0', # for debug use 'MineRLNavigateDenseFixed-v0', 'MineRLObtainTest-v0', ], help='MineRL environment identifier.') parser.add_argument( '--outdir', type=str, default='results', help= 'Directory path to save output files. If it does not exist, it will be created.' ) parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 31)') parser.add_argument('--gpu', type=int, default=0, help='GPU to use, set to -1 if no GPU.') parser.add_argument('--demo', action='store_true', default=False) parser.add_argument('--load', type=str, default=None) parser.add_argument( '--final-exploration-frames', type=int, default=10**6, help='Timesteps after which we stop annealing exploration rate') parser.add_argument('--final-epsilon', type=float, default=0.01, help='Final value of epsilon during training.') parser.add_argument('--eval-epsilon', type=float, default=0.001, help='Exploration epsilon used during eval episodes.') parser.add_argument( '--noisy-net-sigma', type=float, default=None, help='NoisyNet explorer switch. This disables following options: ' '--final-exploration-frames, --final-epsilon, --eval-epsilon') parser.add_argument('--arch', type=str, default='dueling', choices=['dueling', 'distributed_dueling'], help='Network architecture to use.') parser.add_argument('--replay-capacity', type=int, default=10**6, help='Maximum capacity for replay buffer.') parser.add_argument( '--replay-start-size', type=int, default=5 * 10**4, help='Minimum replay buffer size before performing gradient updates.') parser.add_argument( '--target-update-interval', type=int, default=3 * 10**4, help='Frequency (in timesteps) at which the target network is updated.' ) parser.add_argument('--update-interval', type=int, default=4, help='Frequency (in timesteps) of network updates.') parser.add_argument('--eval-n-runs', type=int, default=3) parser.add_argument('--no-clip-delta', dest='clip_delta', action='store_false') parser.set_defaults(clip_delta=True) parser.add_argument('--num-step-return', type=int, default=1) parser.add_argument( '--agent', type=str, default='DQN', choices=['DQN', 'DoubleDQN', 'PAL', 'CategoricalDoubleDQN']) parser.add_argument('--logging-level', type=int, default=20, help='Logging level. 10:DEBUG, 20:INFO etc.') parser.add_argument('--gray-scale', action='store_true', default=False, help='Convert pov into gray scaled image.') parser.add_argument( '--monitor', action='store_true', default=False, help= 'Monitor env. Videos and additional information are saved as output files when evaluation.' ) parser.add_argument('--lr', type=float, default=2.5e-4, help='Learning rate.') parser.add_argument('--adam-eps', type=float, default=1e-8, help='Epsilon for Adam.') parser.add_argument('--prioritized', action='store_true', default=False, help='Use prioritized experience replay.') parser.add_argument('--frame-stack', type=int, default=None, help='Number of frames stacked (None for disable).') parser.add_argument('--frame-skip', type=int, default=None, help='Number of frames skipped (None for disable).') parser.add_argument('--gamma', type=float, default=0.99, help='Discount rate.') parser.add_argument('--batch-accumulator', type=str, default='sum', choices=['sum', 'mean'], help='accumulator for batch loss.') parser.add_argument( '--disable-action-prior', action='store_true', default=False, help= 'If specified, action_space shaping based on prior knowledge will be disabled.' ) parser.add_argument( '--always-keys', type=str, default=None, nargs='*', help= 'List of action keys, which should be always pressed throughout interaction with environment.' ) parser.add_argument( '--reverse-keys', type=str, default=None, nargs='*', help= 'List of action keys, which should be always pressed but can be turn off via action.' ) parser.add_argument( '--exclude-keys', type=str, default=None, nargs='*', help= 'List of action keys, which should be ignored for discretizing action space.' ) parser.add_argument( '--exclude-noop', action='store_true', default=False, help='The "noop" will be excluded from discrete action list.') args = parser.parse_args() args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir) import logging log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s' logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'), format=log_format, level=args.logging_level) console_handler = logging.StreamHandler() console_handler.setLevel(args.logging_level) console_handler.setFormatter(logging.Formatter(log_format)) logging.getLogger('').addHandler( console_handler) # add hander to the root logger logger.info('Output files are saved in {}'.format(args.outdir)) utils.log_versions() try: _main(args) except: # noqa logger.exception('execution failed.') raise
def main(): parser = argparse.ArgumentParser() env_choices = [ # basic envs 'MineRLTreechop-v0', 'MineRLNavigate-v0', 'MineRLNavigateDense-v0', 'MineRLNavigateExtreme-v0', 'MineRLNavigateExtremeDense-v0', 'MineRLObtainIronPickaxe-v0', 'MineRLObtainIronPickaxeDense-v0', 'MineRLObtainDiamond-v0', 'MineRLObtainDiamondDense-v0', # obfuscated envs 'MineRLTreechopVectorObf-v0', 'MineRLNavigateVectorObf-v0', 'MineRLNavigateExtremeVectorObf-v0', # MineRL data pipeline fails for these envs: https://github.com/minerllabs/minerl/issues/364 # 'MineRLNavigateDenseVectorObf-v0', 'MineRLNavigateExtremeDenseVectorObf-v0', 'MineRLObtainDiamondVectorObf-v0', 'MineRLObtainDiamondDenseVectorObf-v0', 'MineRLObtainIronPickaxeVectorObf-v0', 'MineRLObtainIronPickaxeDenseVectorObf-v0', # for debugging 'MineRLNavigateDenseFixed-v0', 'MineRLObtainTest-v0', ] parser.add_argument('--env', type=str, choices=env_choices, required=True, help='MineRL environment identifier.') # meta settings parser.add_argument('--outdir', type=str, default='results', help='Directory path to save output files. If it does not exist, it will be created.') parser.add_argument('--seed', type=int, default=0, help='Random seed [0, 2 ** 31)') parser.add_argument('--gpu', type=int, default=0, help='GPU to use, set to -1 if no GPU.') parser.add_argument('--demo', action='store_true', default=False) parser.add_argument('--load', type=str, default=None) parser.add_argument('--logging-level', type=int, default=20, help='Logging level. 10:DEBUG, 20:INFO etc.') parser.add_argument('--eval-n-runs', type=int, default=3) parser.add_argument('--monitor', action='store_true', default=False, help='Monitor env. Videos and additional information are saved as output files when evaluation.') # training scheme (agent) parser.add_argument('--agent', type=str, default='DQN', choices=['DQN', 'DoubleDQN', 'PAL', 'CategoricalDoubleDQN']) # network architecture parser.add_argument('--arch', type=str, default='dueling', choices=['dueling', 'distributed_dueling'], help='Network architecture to use.') # update rule settings parser.add_argument('--update-interval', type=int, default=4, help='Frequency (in timesteps) of network updates.') parser.add_argument('--frame-skip', type=int, default=None, help='Number of frames skipped (None for disable).') parser.add_argument('--gamma', type=float, default=0.99, help='Discount rate.') parser.add_argument('--no-clip-delta', dest='clip_delta', action='store_false') parser.set_defaults(clip_delta=True) parser.add_argument('--num-step-return', type=int, default=1) parser.add_argument('--lr', type=float, default=2.5e-4, help='Learning rate.') parser.add_argument('--adam-eps', type=float, default=1e-8, help='Epsilon for Adam.') parser.add_argument('--batch-accumulator', type=str, default='sum', choices=['sum', 'mean'], help='accumulator for batch loss.') # observation conversion related settings parser.add_argument('--gray-scale', action='store_true', default=False, help='Convert pov into gray scaled image.') parser.add_argument('--frame-stack', type=int, default=None, help='Number of frames stacked (None for disable).') # exploration related settings parser.add_argument('--final-exploration-frames', type=int, default=10 ** 6, help='Timesteps after which we stop annealing exploration rate') parser.add_argument('--final-epsilon', type=float, default=0.01, help='Final value of epsilon during training.') parser.add_argument('--eval-epsilon', type=float, default=0.001, help='Exploration epsilon used during eval episodes.') parser.add_argument('--noisy-net-sigma', type=float, default=None, help='NoisyNet explorer switch. This disables following options: ' '--final-exploration-frames, --final-epsilon, --eval-epsilon') # experience replay buffer related settings parser.add_argument('--replay-capacity', type=int, default=10 ** 6, help='Maximum capacity for replay buffer.') parser.add_argument('--replay-start-size', type=int, default=5 * 10 ** 4, help='Minimum replay buffer size before performing gradient updates.') parser.add_argument('--prioritized', action='store_true', default=False, help='Use prioritized experience replay.') # target network related settings parser.add_argument('--target-update-interval', type=int, default=3 * 10 ** 4, help='Frequency (in timesteps) at which the target network is updated.') # K-means related settings parser.add_argument('--kmeans-n-clusters', type=int, default=30, help='#clusters for K-means') args = parser.parse_args() args.outdir = pfrl.experiments.prepare_output_dir(args, args.outdir) log_format = '%(levelname)-8s - %(asctime)s - [%(name)s %(funcName)s %(lineno)d] %(message)s' logging.basicConfig(filename=os.path.join(args.outdir, 'log.txt'), format=log_format, level=args.logging_level) console_handler = logging.StreamHandler() console_handler.setLevel(args.logging_level) console_handler.setFormatter(logging.Formatter(log_format)) logging.getLogger('').addHandler(console_handler) # add hander to the root logger logger.info('Output files will be saved in {}'.format(args.outdir)) utils.log_versions() try: dqn_family( # meta setttings env_id=args.env, outdir=args.outdir, seed=args.seed, gpu=args.gpu, demo=args.demo, load=args.load, eval_n_runs=args.eval_n_runs, monitor=args.monitor, # hyper params agent_type=args.agent, arch=args.arch, update_interval=args.update_interval, frame_skip=args.frame_skip, gamma=args.gamma, clip_delta=args.clip_delta, num_step_return=args.num_step_return, lr=args.lr, adam_eps=args.adam_eps, batch_accumulator=args.batch_accumulator, gray_scale=args.gray_scale, frame_stack=args.frame_stack, final_exploration_frames=args.final_exploration_frames, final_epsilon=args.final_epsilon, eval_epsilon=args.eval_epsilon, noisy_net_sigma=args.noisy_net_sigma, replay_capacity=args.replay_capacity, replay_start_size=args.replay_start_size, prioritized=args.prioritized, target_update_interval=args.target_update_interval, kmeans_n_clusters=args.kmeans_n_clusters, ) except: # noqa logger.exception('execution failed.') raise