Exemplo n.º 1
0
        print("load ... %d" % start_from)
        for model in models:
            model.load(savedir, start_from)
    else:
        start_from = 0

    # print debug info
    print(args)
    print("view_space", env.get_view_space(handles[0]))
    print("feature_space", env.get_feature_space(handles[0]))

    # play
    start = time.time()
    for k in range(start_from, start_from + args.n_round):
        tic = time.time()
        eps = buffer.piecewise_decay(k, [0, 700, 1400],
                                     [1, 0.2, 0.05]) if not args.greedy else 0
        loss, num, reward, value = play_a_round(
            env,
            args.map_size,
            handles,
            models,
            train=args.train,
            print_every=50,
            render=args.render or (k + 1) % args.render_every == 0,
            eps=eps)  # for e-greedy

        log.info("round %d\t loss: %s\t num: %s\t reward: %s\t value: %s" %
                 (k, loss, num, reward, value))
        print("round time %.2f  total time %.2f\n" %
              (time.time() - tic, time.time() - start))
Exemplo n.º 2
0
        print("load ... %d" % start_from)
        models[0].load(savedir, start_from)
    else:
        start_from = 0

    # print debug info
    print(args)
    print("view_size", env.get_view_space(handles[0]))
    print("feature_size", env.get_feature_space(handles[0]))

    # play
    start = time.time()
    for k in range(start_from, start_from + args.n_round):
        tic = time.time()
        start = 1 if args.opponent != -1 else 0.1
        train_eps = buffer.piecewise_decay(
            k, [0, 100, 250], [start, 0.1, 0.05]) if not args.greedy else 0
        opponent_eps = train_eps if k < 100 else 0.05  # can use curriculum learning in first 100 steps

        loss, num, reward, value = play_a_round(
            env,
            args.map_size,
            handles,
            models,
            eps=[opponent_eps, train_eps],
            step_batch_size=step_batch_size,
            train=args.train,
            print_every=50,
            render=args.render
            or (k + 1) % args.render_every == 0)  # for e-greedy

        log.info("round %d\t loss: %s\t num: %s\t reward: %s\t value: %s" %
Exemplo n.º 3
0
        print("load ... %d" % start_from)
        for model in models:
            model.load(savedir, start_from)
    else:
        start_from = 0

    # print state info
    print(args)
    print("view_space", env.get_view_space(handles[0]))
    print("feature_space", env.get_feature_space(handles[0]))

    # play
    start = time.time()
    for k in range(start_from, start_from + args.n_round):
        tic = time.time()
        eps = buffer.piecewise_decay(k, [0, 600, 1200],
                                     [1, 0.2, 0.1]) if not args.greedy else 0
        loss, num, reward, value = play_a_round(
            env,
            args.map_size,
            handles,
            models,
            train=args.train,
            print_every=50,
            render=args.render or (k + 1) % args.render_every == 0,
            eps=eps)  # for e-greedy

        log.info("round %d\t loss: %s\t num: %s\t reward: %s\t value: %s" %
                 (k, loss, num, reward, value))
        print("round time %.2f  total time %.2f\n" %
              (time.time() - tic, time.time() - start))
Exemplo n.º 4
0
                             args.map_size,
                             food_handle,
                             player_handles,
                             models,
                             -1,
                             record=True,
                             render=False,
                             print_every=args.print_every,
                             eps=eps)
    else:
        # play
        start = time.time()
        train_id = 0 if args.train else -1
        for k in range(start_from, start_from + args.n_round):
            tic = time.time()
            eps = buffer.piecewise_decay(
                k, [0, 400, 1200], [1.0, 0.2, 0.10]) if not args.greedy else 0
            loss, reward, value, pos_reward_ct, fill_rate = \
                play_a_round(env, args.map_size, food_handle, player_handles, models,
                             train_id, record=False,
                             render=args.render or (k+1) % args.render_every == 0,
                             print_every=args.print_every, eps=eps)
            log.info(
                "round %d\t loss: %.3f\t reward: %.2f\t value: %.3f\t pos_reward_ct: %d\t fill: %.2f"
                % (k, loss, reward, value, pos_reward_ct, fill_rate))
            print("round time %.2f  total time %.2f\n" %
                  (time.time() - tic, time.time() - start))

            if (k + 1) % args.save_every == 0 and args.train:
                print("save models...")
                for model in models:
                    model.save(save_dir, k)