def __init__(self, load_model=False, model_path=None): logger.info('PytorchAgent init') self.preprocessor = DTPytorchWrapper() self.model = Model() self.current_image = np.zeros((640, 480, 3)) self.steering_to_wheel_wrapper = SteeringToWheelVelWrapper() self.controller = Controller() self.dt = None self.last_t = None self.old_obs = None logger.info('PytorchAgent init complete')
class PytorchAgent: def __init__(self, load_model=False, model_path=None): logger.info('PytorchAgent init') self.preprocessor = DTPytorchWrapper() self.model = Model() self.current_image = np.zeros((640, 480, 3)) self.steering_to_wheel_wrapper = SteeringToWheelVelWrapper() self.controller = Controller() self.dt = None self.last_t = None self.old_obs = None logger.info('PytorchAgent init complete') def init(self, context: Context): context.info('init()') def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: Duckiebot1Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) # self.current_image = self.preprocessor.preprocess(obs) self.current_image = obs def compute_action(self, observation): pose = self.model.predict(observation).detach().cpu().numpy()[0] pose[1] *= 3.1415 time_now = time.time() if self.last_t is not None: self.dt = time_now - self.last_t v, omega = self.controller.compute_control_action(pose[0], pose[1], dt=self.dt) action = self.steering_to_wheel_wrapper.convert(np.array([v, omega])) self.last_t = time_now self.old_obs = observation return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = Duckiebot1Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')
def launch_env(id=None): env = None if id is None: from gym_duckietown.simulator import Simulator env = Simulator( seed=123, # random seed map_name="loop_empty", max_steps=500001, # we don't want the gym to reset itself domain_rand=False, camera_width=640, camera_height=480, accept_start_angle_deg=4, # start close to straight full_transparency=True, distortion=True, ) else: env = gym.make(id) # Wrappers env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120 env = SteeringToWheelVelWrapper(env) env = ActionWrapper(env) #env = DtRewardWrapper(env) return env
def solve(params, cis): # python has dynamic typing, the line below can help IDEs with autocompletion assert isinstance(cis, ChallengeInterfaceSolution) # after this cis. will provide you with some autocompletion in some IDEs (e.g.: pycharm) cis.info('Creating model.') # you can have logging capabilties through the solution interface (cis). # the info you log can be retrieved from your submission files. # We get environment from the Evaluation Engine cis.info('Making environment') env = gym.make(params['env']) # === BEGIN SUBMISSION === # If you created custom wrappers, you also need to copy them into this folder. from wrappers import NormalizeWrapper, ImgWrapper, ActionWrapper, ResizeWrapper env = ResizeWrapper(env) env = NormalizeWrapper(env) # to make the images pytorch-conv-compatible env = ImgWrapper(env) env = ActionWrapper(env) # you ONLY need this wrapper if you trained your policy on [speed,steering angle] # instead [left speed, right speed] env = SteeringToWheelVelWrapper(env) # you have to make sure that you're wrapping at least the actions # and observations in the same as during training so that your model # receives the same kind of input, because that's what it's trained for # (for example if your model is trained on grayscale images and here # you _don't_ make it grayscale too, then your model wont work) # HERE YOU NEED TO CREATE THE POLICY NETWORK SAME AS YOU DID IN THE TRAINING CODE # if you aren't using the DDPG baseline code, then make sure to copy your model # into the model.py file and that it has a model.predict(state) method. from model import DDPG model = DDPG(state_dim=env.observation_space.shape, action_dim=2, max_action=1, net_type="cnn") try: model.load("model", "models") # === END SUBMISSION === # Then we make sure we have a connection with the environment and it is ready to go cis.info('Reset environment') observation = env.reset() # While there are no signal of completion (simulation done) # we run the predictions for a number of episodes, don't worry, we have the control on this part while True: # we passe the observation to our model, and we get an action in return action = model.predict(observation) # we tell the environment to perform this action and we get some info back in OpenAI Gym style observation, reward, done, info = env.step(action) # here you may want to compute some stats, like how much reward are you getting # notice, this reward may no be associated with the challenge score. # it is important to check for this flag, the Evalution Engine will let us know when should we finish # if we are not careful with this the Evaluation Engine will kill our container and we will get no score # from this submission if 'simulation_done' in info: cis.info('simulation_done received.') break if done: cis.info('Episode done; calling reset()') env.reset() finally: # release CPU/GPU resources, let's be friendly with other users that may need them cis.info('Releasing resources') try: model.close() except: msg = 'Could not call model.close():\n%s' % traceback.format_exc() cis.error(msg) cis.info('Graceful exit of solve()')
args.seed) # Launch the env with our helper function env = launch_env(seed=111, map_name=args.map_name) # Wrappers env = wrappers.Monitor(env, './videos/test/' + file_name + '/', force=True, video_callable=lambda x: True) env = CropWrapper(env) env = ResizeWrapper(env) #env = FrameStack(env, args.frame_stack_k) env = NormalizeWrapper(env) env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120 env = SteeringToWheelVelWrapper(env) #env = SoftActionWrapper(env) #env = ActionWrapper(env) #env = DtRewardWrapper(env) # not during testing state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy policy = TD3(state_dim, action_dim, max_action, net_type=args.net_type, args=args)
os.makedirs("./results") if args.save_models and not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Launch the env with our helper function env = launch_env(map_name=args.map_name) time_str = time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time())) # Wrappers env = wrappers.Monitor(env, './videos/train/' + time_str + '/', force=True) #env = ResizeWrapper(env) #env = FrameStack(env, k = args.frame_stack_k) #env = NormalizeWrapper(env) #env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120 env = SteeringToWheelVelWrapper(env) #env = StableRewardWrapper(env) #env = ActionWrapper(env) #env = SoftActionWrapper(env) env = DtRewardWrapper(env) # Set seeds seed(args.seed) state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy policy = TD3(state_dim, action_dim,