agent = RDDPGAgent(state_size=state_size, action_size=action_size, actor_lr=args.actor_lr, critic_lr=args.critic_lr, tau=args.tau, gamma=args.gamma, lambd=args.lambd, batch_size=args.batch_size, memory_size=args.memory_size, epsilon=args.epsilon, epsilon_end=args.epsilon_end, decay_step=args.decay_step, load_model=args.load_model) episode = 0 env = Env() if args.play: while True: try: done = False bug = False # stats bestY, timestep, score, avgvel, avgQ = 0., 0, 0., 0., 0. observe = env.reset() image, vel = observe try: image = transform_input(image, args.img_height, args.img_width)
targetY = 58 gamma = 0.99 lamb = 0.90 max_step = 600 score_bank = [] episode = 0 if __name__ == "__main__": config = tf.ConfigProto() config.gpu_options.allow_growth = True # Make RL agent model = PPO_Network(SEQUENCE_SIZE, W, H, ACTION_SIZE) functions = PPO_Functions() env = Env() #set session, and initialize model.set_session(tf.Session(config=config)) try: for e in range(5000000): done = False bug = False level = -1 reward_sum = 0 t = 0 score = 0 observation = env.reset() responses = observation[0] quad_vel = observation[1] # stack history here try:
agent3 = RDQNAgent( state_size=state_size, action_size=action_size, lr=args.lr, gamma=args.gamma, batch_size=args.batch_size, memory_size=args.memory_size, epsilon=args.epsilon, epsilon_end=args.epsilon_end, decay_step=args.decay_step, load_model=args.load_model, agent_name=agent_name3 ) episode = 0 env = Env() if args.play: while True: try: done = False bug = False # stats bestReward, timestep, score, avgQ = 0., 0, 0., 0. observe = env.reset() image, vel = observe vel = np.array(vel) try: image1 = transform_input(image[0], args.img_height, args.img_width)
quad_offset = (0, 0, -scaling_factor) return quad_offset if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--verbose', action='store_true') parser.add_argument('--continuous', action='store_true') args = parser.parse_args() if args.continuous: agent = RandomAgentContinuous(3) else: agent = RandomAgentDiscrete(7) env = Env() episode = 0 while True: done = False timestep = 0 score = 0 _ = env.reset() while not done: timestep += 1 action = agent.get_action() if not args.continuous: action = interpret_action(action) _, reward, done, info = env.step(action) score += reward