Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        dest='verbosity',
                        default=0,
                        help='Set verbosity.')
    args = parser.parse_args()

    if args.verbosity == 0:
        logger.setLevel(logging.INFO)
    elif args.verbosity >= 1:
        logger.setLevel(logging.DEBUG)

    env = gym.make('wob.mini.ClickDialog-v0')
    env = jiminy.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env,
                      wob_vnc.remotes_url(port_ofs=0,
                                          hostname='0.0.0.0',
                                          count=REMOTES_COUNT)
                      )  # automatically creates a local docker container

    observation_n = env.reset()
    idx = 0
    while True:
        # your agent here
        #
        # Try sending this instead of a random action: ('KeyEvent', 'ArrowUp', True)
        action_n = [env.action_space.sample() for ob in observation_n]
        observation_n, reward_n, done_n, info = env.step(action_n)
        print("idx: {}, reward: {}".format(idx * REMOTES_COUNT, reward_n))
        idx += 1
    return 0
    parser.add_argument("--verbose",
                        default=False,
                        action='store_true',
                        help="Display every step")
    args = parser.parse_args()

    env_name = args.env
    if not env_name.startswith('wob.mini.'):
        env_name = "wob.mini." + env_name

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    if args.save is not None:
        env = wob_vnc.MiniWoBPeeker(env, args.save)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env, REMOTE_ADDR, fps=5)

    net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE,
                          n_actions=env.action_space.n)
    if args.model:
        net.load_state_dict(torch.load(args.model))

    env.reset()
    steps_count = 0
    reward_sum = 0

    for round_idx in range(args.count):
        action = env.action_space.sample()
        step_idx = 0
        while True:
            obs, reward, done, info, idle_count = step_env(env, action)
Exemplo n.º 3
0
    writer = SummaryWriter(comment="-wob_click_" + name)
    saves_path = os.path.join(SAVES_DIR, name)
    os.makedirs(saves_path, exist_ok=True)

    demo_samples = None
    if args.demo:
        demo_samples = vnc_demo.load_demo(args.demo, env_name)
        if not demo_samples:
            demo_samples = None
        else:
            print("Loaded %d demo samples, will use them during training" % len(demo_samples))

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env, wob_vnc.remotes_url(port_ofs=args.port_ofs, hostname=args.host, count=REMOTES_COUNT))

    net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n)
    if args.cuda:
        net.cuda()
    print(net)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3)

    agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], cuda=args.cuda,
                                   apply_softmax=True)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        [env], agent, gamma=GAMMA, steps_count=REWARD_STEPS, vectorized=True)

    best_reward = None
    with common.RewardTracker(writer) as tracker:
        with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model", help="Model file to load")
    parser.add_argument("-n", "--name", required=True, help="Prefix to save screenshots")
    parser.add_argument("--count", type=int, default=1, help="Count of runs to play, default=1")
    parser.add_argument("--env", default=ENV_NAME, help="Environment name to solve, default=" + ENV_NAME)
    args = parser.parse_args()

    env_name = args.env
    if not env_name.startswith('wob.mini.'):
        env_name = "wob.mini." + env_name

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env, keep_text=True)
    wob_vnc.configure(env, REMOTE_ADDR)

    net = model_vnc.ModelMultimodal(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n)
    if args.model:
        net.load_state_dict(torch.load(args.model))
        preprocessor = model_vnc.MultimodalPreprocessor.load(args.model[:-4] + ".pre")
    else:
        preprocessor = model_vnc.MultimodalPreprocessor()
    env.reset()

    for round_idx in range(args.count):
        action = env.action_space.sample()
        step_idx = 0
        while True:
            obs, reward, done, info, idle_count = step_env(env, action)
            print(step_idx, reward, done, idle_count)
                        type=int,
                        default=1,
                        help="Count of runs to play, default=1")
    parser.add_argument("--env",
                        default=ENV_NAME,
                        help="Environment name to solve, default=" + ENV_NAME)
    args = parser.parse_args()

    env_name = args.env
    if not env_name.startswith('wob.mini.'):
        env_name = "wob.mini." + env_name

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    env = wob_vnc.MiniWoBCropper(env, keep_text=True)
    wob_vnc.configure(env, REMOTE_ADDR)

    net = model_vnc.ModelMultimodal(input_shape=wob_vnc.WOB_SHAPE,
                                    n_actions=env.action_space.n)
    if args.model:
        net.load_state_dict(torch.load(args.model))
        preprocessor = model_vnc.MultimodalPreprocessor.load(args.model[:-4] +
                                                             ".pre")
    else:
        preprocessor = model_vnc.MultimodalPreprocessor()
    env.reset()

    for round_idx in range(args.count):
        action = env.action_space.sample()
        step_idx = 0
        while True:
    parser.add_argument("--save", help="Enables screenshots and gives an images prefix")
    parser.add_argument("--count", type=int, default=1, help="Count of episodes to play, default=1")
    parser.add_argument("--env", default=ENV_NAME, help="Environment name to solve, default=" + ENV_NAME)
    parser.add_argument("--verbose", default=False, action='store_true', help="Display every step")
    args = parser.parse_args()

    env_name = args.env
    if not env_name.startswith('wob.mini.'):
        env_name = "wob.mini." + env_name

    env = gym.make(env_name)
    env = universe.wrappers.experimental.SoftmaxClickMouse(env)
    if args.save is not None:
        env = wob_vnc.MiniWoBPeeker(env, args.save)
    env = wob_vnc.MiniWoBCropper(env)
    wob_vnc.configure(env, REMOTE_ADDR, fps=5)

    net = model_vnc.Model(input_shape=wob_vnc.WOB_SHAPE, n_actions=env.action_space.n)
    if args.model:
        net.load_state_dict(torch.load(args.model))

    env.reset()
    steps_count = 0
    reward_sum = 0

    for round_idx in range(args.count):
        action = env.action_space.sample()
        step_idx = 0
        while True:
            obs, reward, done, info, idle_count = step_env(env, action)
            if args.verbose: