예제 #1
0
def run():
    args = parser.parse_args()
    args.gpu = util.parse_to_num(args.gpu)
    cmds, notes = create_commands("e", args)
    if args.dry_run:
        print(
            "Dry-run mode due to -d flag, otherwise the following commands would be executed:"
        )
    else:
        print("Executing the following commands:")
    print("\n".join(cmds))
    print("")
    if not args.dry_run:
        if args.mode == "tmux":
            os.environ["TMUX"] = ""
        path = os.path.join(os.getcwd(), args.log)
        if os.path.exists(path):
            key = raw_input("%s exists. Do you want to delete it? (y/n): " %
                            path)
            if key != 'n':
                os.system("rm -rf %s" % path)
                os.system("\n".join(cmds))
                print('\n'.join(notes))
        else:
            os.system("\n".join(cmds))
            print('\n'.join(notes))
예제 #2
0
def run():
    args = parser.parse_args()
    args.task = 0
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.branch = util.parse_to_num(args.branch)

    env = new_env(args)
    args.meta_dim = 0 if not hasattr(env, 'meta') else len(env.meta())
    device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    config = tf.ConfigProto(device_filters=device,
                            gpu_options=gpu_options,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        if args.alg == 'A3C':
            model_type = 'policy'
        elif args.alg == 'Q':
            model_type = 'q'
        elif args.alg == 'VPN':
            model_type = 'vpn'
        else:
            raise ValueError('Invalid algorithm: ' + args.alg)
        with tf.device(device):
            with tf.variable_scope("local/learner"):
                agent = eval("model." + args.model)(
                    env.observation_space.shape,
                    env.action_space.n,
                    type=model_type,
                    gamma=args.gamma,
                    dim=args.dim,
                    f_num=args.f_num,
                    f_stride=args.f_stride,
                    f_size=args.f_size,
                    f_pad=args.f_pad,
                    branch=args.branch,
                    meta_dim=args.meta_dim)
                print("Num parameters: %d" % agent.num_param)

            saver = tf.train.Saver()
            saver.restore(sess, args.checkpoint)
        np.random.seed(args.seed)
        reward = evaluate(env, agent, args.n_play, eps=args.eps)
        print("Reward: %.2f" % (reward))
def run():
    args = parser.parse_args()
    args.task = 0
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.branch = util.parse_to_num(args.branch)

    env = new_env(args)
    args.meta_dim = 0
    device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    config = tf.ConfigProto(device_filters=device,
                            gpu_options=gpu_options,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        model_type = 'vpn'
        with tf.device(device):
            with tf.variable_scope("local/learner"):
                agent = eval("model." + args.model)(
                    env.observation_space.shape,
                    env.action_space.n,
                    type=model_type,
                    gamma=args.gamma,
                    dim=args.dim,
                    f_num=args.f_num,
                    f_stride=args.f_stride,
                    f_size=args.f_size,
                    f_pad=args.f_pad,
                    branch=args.branch,
                    meta_dim=args.meta_dim)
                print("Num parameters: %d" % agent.num_param)

            saver = tf.train.Saver()
            saver.restore(sess, args.checkpoint)
        np.random.seed(args.seed)
        evaluate(env, agent, args.n_play, eps=args.eps)
예제 #4
0
def main(_):
    """
Setting up Tensorflow for data parallel work
"""

    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-gpu',
                        '--gpu',
                        default=0,
                        type=int,
                        help='Number of GPUs')
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    parser.add_argument('--task', default=0, type=int, help='Task index')
    parser.add_argument('--job-name', default="worker", help='worker or ps')
    parser.add_argument('--num-workers',
                        default=1,
                        type=int,
                        help='Number of workers')
    parser.add_argument('--num-ps',
                        type=int,
                        default=1,
                        help="Number of parameter servers")
    parser.add_argument('--log', default="/tmp/vpn", help='Log directory path')
    parser.add_argument('--env-id', default="maze", help='Environment id')
    parser.add_argument(
        '-r',
        '--remotes',
        default=None,
        help='References to environments to create (e.g. -r 20), '
        'or the address of pre-existing VNC servers and '
        'rewarders to use (e.g. -r vnc://localhost:5900+15900,vnc://localhost:5901+15901)'
    )
    parser.add_argument('-a',
                        '--alg',
                        choices=['A3C', 'Q', 'VPN'],
                        default="A3C")
    parser.add_argument('-mo',
                        '--model',
                        type=str,
                        default="LSTM",
                        help="Name of model: [CNN | LSTM]")
    parser.add_argument('--eval-freq',
                        type=int,
                        default=250000,
                        help="Evaluation frequency")
    parser.add_argument('--eval-num',
                        type=int,
                        default=500,
                        help="Evaluation frequency")
    parser.add_argument('--eval-epoch',
                        type=int,
                        default=0,
                        help="Evaluation epoch")
    parser.add_argument('--seed', type=int, default=0, help="Random seed")
    parser.add_argument('--config',
                        type=str,
                        default="config/collect_deterministic.xml",
                        help="config xml file for environment")

    # Hyperparameters
    parser.add_argument('-n',
                        '--t-max',
                        type=int,
                        default=10,
                        help="Number of unrolling steps")
    parser.add_argument('-g',
                        '--gamma',
                        type=float,
                        default=0.98,
                        help="Discount factor")
    parser.add_argument('-ld',
                        '--ld',
                        type=float,
                        default=1,
                        help="Lambda for GAE")
    parser.add_argument('-lr',
                        '--lr',
                        type=float,
                        default=1e-4,
                        help="Learning rate")
    parser.add_argument('--decay',
                        type=float,
                        default=0.95,
                        help="Learning decay")
    parser.add_argument('-ms',
                        '--max-step',
                        type=int,
                        default=int(15e6),
                        help="Max global step")
    parser.add_argument('--dim',
                        type=int,
                        default=0,
                        help="Number of final hidden units")
    parser.add_argument('--f-num',
                        type=str,
                        default='32,32,64',
                        help="num of conv filters")
    parser.add_argument('--f-pad',
                        type=str,
                        default='SAME',
                        help="padding of conv filters")
    parser.add_argument('--f-stride',
                        type=str,
                        default='1,1,2',
                        help="stride of conv filters")
    parser.add_argument('--f-size',
                        type=str,
                        default='3,3,4',
                        help="size of conv filters")
    parser.add_argument('--h-dim',
                        type=str,
                        default='',
                        help="num of hidden units")

    # Q-Learning parameters
    parser.add_argument('-s',
                        '--sync',
                        type=int,
                        default=10000,
                        help="Target network synchronization frequency")
    parser.add_argument('-f',
                        '--update-freq',
                        type=int,
                        default=1,
                        help="Parameter update frequency")
    parser.add_argument('--eps-step',
                        type=int,
                        default=int(1e6),
                        help="Num of local steps for epsilon scheduling")
    parser.add_argument('--eps',
                        type=float,
                        default=0.05,
                        help="Final epsilon value")
    parser.add_argument('--eps-eval',
                        type=float,
                        default=0.0,
                        help="Epsilon for evaluation")

    # VPN parameters
    parser.add_argument('--prediction-step',
                        type=int,
                        default=3,
                        help="number of prediction steps")
    parser.add_argument('--branch',
                        type=str,
                        default="4,4,4",
                        help="branching factor")
    parser.add_argument('--buf',
                        type=int,
                        default=10**6,
                        help="num of steps for random buffer")

    args = parser.parse_args()
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.h_dim = util.parse_to_num(args.h_dim)
    args.eps_eval = min(args.eps, args.eps_eval)
    args.branch = util.parse_to_num(args.branch)
    spec = cluster_spec(args.num_workers, args.num_ps)
    cluster = tf.train.ClusterSpec(spec).as_cluster_def()

    def shutdown(signal, frame):
        logger.warn('Received signal %s: exiting', signal)
        sys.exit(128 + signal)

    signal.signal(signal.SIGHUP, shutdown)
    signal.signal(signal.SIGINT, shutdown)
    signal.signal(signal.SIGTERM, shutdown)

    gpu_options = None
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.15)

    if args.job_name == "worker":
        server = tf.train.Server(cluster,
                                 job_name="worker",
                                 task_index=args.task,
                                 config=tf.ConfigProto(
                                     intra_op_parallelism_threads=1,
                                     inter_op_parallelism_threads=1,
                                     gpu_options=gpu_options))
        run(args, server)
    elif args.job_name == "test":
        server = tf.train.Server(cluster,
                                 job_name="worker",
                                 task_index=args.task,
                                 config=tf.ConfigProto(
                                     intra_op_parallelism_threads=1,
                                     inter_op_parallelism_threads=1,
                                     gpu_options=gpu_options))
        run_tester(args, server)
    elif args.job_name == "ps":
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.05)
        server = tf.train.Server(cluster,
                                 job_name="ps",
                                 task_index=args.task,
                                 config=tf.ConfigProto(
                                     device_filters=["/job:ps"],
                                     gpu_options=gpu_options))
        while True:
            time.sleep(1000)
parser.add_argument('--f-size', type=str, default='3,3,4', help="size of conv filters")
parser.add_argument('--f-pad', type=str, default='SAME', help="padding of conv filters")

# VPN parameters
parser.add_argument('--branch', type=str, default="4,4,4", help="branching factor")


config = open('config/evaluation_config.xml').read()
config = BeautifulSoup(config, "lxml")
env = MazeSMDP(gamma=0.99, config=config)
# env.visualize().show()
eps = 0.

args = parser.parse_args()
args.task = 0
args.f_num = util.parse_to_num(args.f_num)
args.f_stride = util.parse_to_num(args.f_stride)
args.f_size = util.parse_to_num(args.f_size)
args.branch = util.parse_to_num(args.branch)

args.meta_dim = 0 if env.meta() is None else len(env.meta())
device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
config = tf.ConfigProto(device_filters=device,
                        gpu_options=gpu_options,
                        allow_soft_placement=True)
with tf.Session(config=config) as sess:
    if args.alg == 'A3C':
        model_type = 'policy'
    elif args.alg == 'Q':
        model_type = 'q'
예제 #6
0
def run(envs=None):
    stats = []

    args = parser.parse_args()
    args.task = 0
    args.f_num = util.parse_to_num(args.f_num)
    args.f_stride = util.parse_to_num(args.f_stride)
    args.f_size = util.parse_to_num(args.f_size)
    args.branch = util.parse_to_num(args.branch)

    env = new_env(args)
    if envs is None or not envs:
        envs = [env]

    args.meta_dim = 0 if env.meta() is None else len(env.meta())
    device = '/gpu:0' if args.gpu > 0 else '/cpu:0'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    branches = [
        [4, 4, 4],
        [4, 4, 4, 4],
        [4, 4, 4, 4, 4],
        [4, 1, 4, 1, 4],
        [1],
        [1, 1, 1],
        [1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [4, 4, 4, 1],
        [4, 4, 4, 1, 1],
        [4, 4, 4, 1, 1, 1],
        [1, 4, 4, 4],
        [1, 1, 1, 4, 4, 4],
        [1, 1, 1, 4, 4, 4],
    ]
    paths = [
        f'/home/ikaynov/Repositories/value-prediction-network/Experiments/{x}/best'
        for x in [
            's10_t20_g8_444',
            's10_t20_g8_4444',
            's10_t20_g8_44444',
        ]
    ]
    count = 0
    count_max = len(paths) * len(branches)
    for ck in paths:
        for branch_type in branches:
            print(f'Executing {count}/{count_max}.')
            config = tf.ConfigProto(device_filters=device,
                                    gpu_options=gpu_options,
                                    allow_soft_placement=True)
            tf.reset_default_graph()
            with tf.Session(config=config) as sess:
                sess.run(tf.global_variables_initializer())
                if args.alg == 'A3C':
                    model_type = 'policy'
                elif args.alg == 'Q':
                    model_type = 'q'
                elif args.alg == 'VPN':
                    model_type = 'vpn'
                else:
                    raise ValueError('Invalid algorithm: ' + args.alg)
                with tf.device(device):

                    # np.random.seed(args.seed)

                    with tf.variable_scope("local/learner"):
                        agent = eval("model." + args.model)(
                            env.observation_space.shape,
                            env.action_space.n,
                            type=model_type,
                            gamma=args.gamma,
                            dim=args.dim,
                            f_num=args.f_num,
                            f_stride=args.f_stride,
                            f_size=args.f_size,
                            f_pad=args.f_pad,
                            branch=branch_type,
                            meta_dim=args.meta_dim)
                        agent.train_branch = str([
                            int(x)
                            for x in list(ck.split('/')[-2].split('_')[-1])
                        ])
                        print("Num parameters: %d" % agent.num_param)
                    saver = tf.train.Saver()
                    saver.restore(sess, ck)

                    for i, env in enumerate(envs):
                        run_stats = evaluate(env,
                                             agent,
                                             args.n_play,
                                             eps=args.eps)
                        stats += run_stats
            count += 1
    return stats