示例#1
0
    agent = RDDPGAgent(state_size=state_size,
                       action_size=action_size,
                       actor_lr=args.actor_lr,
                       critic_lr=args.critic_lr,
                       tau=args.tau,
                       gamma=args.gamma,
                       lambd=args.lambd,
                       batch_size=args.batch_size,
                       memory_size=args.memory_size,
                       epsilon=args.epsilon,
                       epsilon_end=args.epsilon_end,
                       decay_step=args.decay_step,
                       load_model=args.load_model)

    episode = 0
    env = Env()

    if args.play:
        while True:
            try:
                done = False
                bug = False

                # stats
                bestY, timestep, score, avgvel, avgQ = 0., 0, 0., 0., 0.

                observe = env.reset()
                image, vel = observe
                try:
                    image = transform_input(image, args.img_height,
                                            args.img_width)
示例#2
0
targetY = 58
gamma = 0.99
lamb = 0.90
max_step = 600

score_bank = []
episode = 0

if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Make RL agent
    model = PPO_Network(SEQUENCE_SIZE, W, H, ACTION_SIZE)
    functions = PPO_Functions()
    env = Env()
    #set session, and initialize
    model.set_session(tf.Session(config=config))
    try:
        for e in range(5000000):
            done = False
            bug = False
            level = -1
            reward_sum = 0
            t = 0
            score = 0
            observation = env.reset()
            responses = observation[0]
            quad_vel = observation[1]
            # stack history here
            try:
    agent3 = RDQNAgent(
        state_size=state_size,
        action_size=action_size,
        lr=args.lr,
        gamma=args.gamma,
        batch_size=args.batch_size,
        memory_size=args.memory_size,
        epsilon=args.epsilon,
        epsilon_end=args.epsilon_end,
        decay_step=args.decay_step,
        load_model=args.load_model,
        agent_name=agent_name3
    )

    episode = 0
    env = Env()

    if args.play:
        while True:
            try:
                done = False
                bug = False

                # stats
                bestReward, timestep, score, avgQ = 0., 0, 0., 0.

                observe = env.reset()
                image, vel = observe
                vel = np.array(vel)
                try:
                    image1 = transform_input(image[0], args.img_height, args.img_width)
示例#4
0
        quad_offset = (0, 0, -scaling_factor)

    return quad_offset


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose', action='store_true')
    parser.add_argument('--continuous', action='store_true')
    args = parser.parse_args()

    if args.continuous:
        agent = RandomAgentContinuous(3)
    else:
        agent = RandomAgentDiscrete(7)
    env = Env()

    episode = 0
    while True:
        done = False
        timestep = 0
        score = 0
        _ = env.reset()

        while not done:
            timestep += 1
            action = agent.get_action()
            if not args.continuous:
                action = interpret_action(action)
            _, reward, done, info = env.step(action)
            score += reward