class PytorchRLBaseline: def init(self, context: Context): context.info('init()') self.image_processor = DTPytorchWrapper() self.action_processor = ActionWrapper(FakeWrap()) from model import DDPG self.check_gpu_available(context) self.model = DDPG(state_dim=self.image_processor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) self.model.load("model", directory="./models") def check_gpu_available(self, context: Context): import torch available = torch.cuda.is_available() req = os.environ.get('AIDO_REQUIRE_GPU', None) context.info(f'torch.cuda.is_available = {available!r} AIDO_REQUIRE_GPU = {req!r}') context.info('init()') if available: i = torch.cuda.current_device() count = torch.cuda.device_count() name = torch.cuda.get_device_name(i) context.info(f'device {i} of {count}; name = {name!r}') else: if req is not None: msg = 'I need a GPU; bailing.' context.error(msg) raise RuntimeError(msg) def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: DB20Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.image_processor.preprocess(obs) def compute_action(self, observation): action = self.model.predict(observation) return self.action_processor.action(action.astype(float)) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = DB20Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')
class PytorchRLTemplateAgent: def __init__(self, load_model=False, model_path=None): logger.info('PytorchRLTemplateAgent init') self.preprocessor = DTPytorchWrapper() self.model = DDPG(state_dim=self.preprocessor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) if load_model: logger.info('PytorchRLTemplateAgent loading models') fp = model_path if model_path else "model" self.model.load(fp, "models", for_inference=True) logger.info('PytorchRLTemplateAgent init complete') def init(self, context: Context): context.info('init()') def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: Duckiebot1Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.preprocessor.preprocess(obs) def compute_action(self, observation): #if observation.shape != self.preprocessor.transposed_shape: # observation = self.preprocessor.preprocess(observation) action = self.model.predict(observation) return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = Duckiebot1Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')
def solve(params, cis): # python has dynamic typing, the line below can help IDEs with autocompletion assert isinstance(cis, ChallengeInterfaceSolution) # after this cis. will provide you with some autocompletion in some IDEs (e.g.: pycharm) cis.info('Creating model.') # you can have logging capabilties through the solution interface (cis). # the info you log can be retrieved from your submission files. # We get environment from the Evaluation Engine cis.info('Making environment') env = gym.make(params['env']) # === BEGIN SUBMISSION === # If you created custom wrappers, you also need to copy them into this folder. from wrappers import NormalizeWrapper, ImgWrapper, ActionWrapper, ResizeWrapper env = ResizeWrapper(env) env = NormalizeWrapper(env) # to make the images pytorch-conv-compatible env = ImgWrapper(env) env = ActionWrapper(env) # you ONLY need this wrapper if you trained your policy on [speed,steering angle] # instead [left speed, right speed] env = SteeringToWheelVelWrapper(env) # you have to make sure that you're wrapping at least the actions # and observations in the same as during training so that your model # receives the same kind of input, because that's what it's trained for # (for example if your model is trained on grayscale images and here # you _don't_ make it grayscale too, then your model wont work) # HERE YOU NEED TO CREATE THE POLICY NETWORK SAME AS YOU DID IN THE TRAINING CODE # if you aren't using the DDPG baseline code, then make sure to copy your model # into the model.py file and that it has a model.predict(state) method. from model import DDPG model = DDPG(state_dim=env.observation_space.shape, action_dim=2, max_action=1, net_type="cnn") try: model.load("model", "models") # === END SUBMISSION === # Then we make sure we have a connection with the environment and it is ready to go cis.info('Reset environment') observation = env.reset() # While there are no signal of completion (simulation done) # we run the predictions for a number of episodes, don't worry, we have the control on this part while True: # we passe the observation to our model, and we get an action in return action = model.predict(observation) # we tell the environment to perform this action and we get some info back in OpenAI Gym style observation, reward, done, info = env.step(action) # here you may want to compute some stats, like how much reward are you getting # notice, this reward may no be associated with the challenge score. # it is important to check for this flag, the Evalution Engine will let us know when should we finish # if we are not careful with this the Evaluation Engine will kill our container and we will get no score # from this submission if 'simulation_done' in info: cis.info('simulation_done received.') break if done: cis.info('Episode done; calling reset()') env.reset() finally: # release CPU/GPU resources, let's be friendly with other users that may need them cis.info('Releasing resources') try: model.close() except: msg = 'Could not call model.close():\n%s' % traceback.format_exc() cis.error(msg) cis.info('Graceful exit of solve()')
env = gym.make('Pendulum-v0').unwrapped n_state = env.observation_space.shape[0] # 提取state的維度 n_action = env.action_space.shape[0] # 提取action的維度 a_limit = env.action_space.high[0] # 提取action連續動作中,最大的可能數值 # Create network net = DDPG(n_state=n_state, n_action=n_action, a_limit=a_limit, model_folder=model_folder, memory_size=memory_size, batch_size=batch_size, tau=tau, gamma=gamma, var=var) net.load() # Train reward_list = [] for i in range(episode): s = env.reset() total_reward = 0 for j in range(max_iter): # env.render() a = net.chooseAction(s) s_, r, finish, info = env.step(a) # 將資料存到記憶庫並更新參數 net.store_path(s, a, r / 10, s_) net.update()
def master_loop(env): logger = logging.getLogger() formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fileHandler = logging.FileHandler('./log/test.log') fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) logger.setLevel(logging.INFO) s_dim = env.get_s_dim() a_dim = env.get_a_dim() a_high = env.get_a_high() a_low = env.get_a_low() # print(a_bound) print("s_dim: {}, a_dim{}, a_high:{}, a_low:{}".format( s_dim, a_dim, a_high, a_low)) ddpg = DDPG(a_dim, s_dim, a_high, a_low, lr_a=LR_A, lr_c=LR_C, gamma=GAMMA, tau=TAU, rpm_size=MEMORY_CAPACITY, batch_size=BATCH_SIZE) status = MPI.Status() start_time = time.time() reset_time = time.time() total_eps = 0 total_step = 0 n_step = 0 n_eps = 0 max_reward = -9999 max_reward_rank = 0 ddpg.load() while total_eps < MAX_EPISODES: data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() if tag == REQ_ACTION: # action = env.action_space.sample() action = ddpg.choose_action(data) comm.send((action, total_eps, total_step), dest=source, tag=RSP_ACTION) elif tag == OBS_DATA: n_step += 1 total_step += 1 (s, a, r, s_, done, ep_reward, ep_step) = data is_done = 0.0 if done: is_done = 1.0 ddpg.store_transition(s, a, r, s_, is_done) if ddpg.pointer > LEARN_START and total_step % 3 == 0: ddpg.learn() if done: total_eps += 1 if ep_reward > max_reward: max_reward = ep_reward max_reward_rank = source s = "eps: {:>8}, worker: {:>3}, ep_reward:{:7.4f}, max:{:7.4f}/{:>3}, step:{:4}".format( total_eps, source, ep_reward, max_reward, max_reward_rank, ep_step) #print(s) logging.info(s) if total_eps % 500 == 0: ddpg.save(total_eps) interval = time.time() - reset_time s = "# total_step: {:>8} ,total_eps: {:>6} eps/min: {:>6}, frame/sec: {:>6}".format( total_step, total_eps, n_eps / interval * 60, n_step / interval) #print(s) logging.info(s) n_step = 0 n_eps = 0 reset_time = time.time()
class PytorchRLTemplateAgent: def __init__(self, load_model: bool, model_path: Optional[str]): self.load_model = load_model self.model_path = model_path def init(self, context: Context): self.check_gpu_available(context) logger.info("PytorchRLTemplateAgent init") from model import DDPG self.preprocessor = DTPytorchWrapper() self.model = DDPG(state_dim=self.preprocessor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) if self.load_model: logger.info("Pytorch Template Agent loading models") fp = self.model_path if self.model_path else "model" self.model.load(fp, "models", for_inference=True) logger.info("PytorchRLTemplateAgent init complete") def check_gpu_available(self, context: Context): import torch available = torch.cuda.is_available() context.info(f"torch.cuda.is_available = {available!r}") context.info("init()") if available: i = torch.cuda.current_device() count = torch.cuda.device_count() name = torch.cuda.get_device_name(i) context.info(f"device {i} of {count}; name = {name!r}") else: no_hardware_GPU_available(context) def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: DB20Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.preprocessor.preprocess(obs) def compute_action(self, observation): # if observation.shape != self.preprocessor.transposed_shape: # observation = self.preprocessor.preprocess(observation) action = self.model.predict(observation) return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = DB20Commands(pwm_commands, led_commands) context.write("commands", commands) def finish(self, context: Context): context.info("finish()")
class PytorchRLTemplateAgent: def __init__(self): pass def init(self, context: Context, load_model=False, model_path=None): self.check_gpu_available(context) logger.info('PytorchRLTemplateAgent init') self.preprocessor = DTPytorchWrapper() self.model = DDPG(state_dim=self.preprocessor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) if load_model: logger.info('PytorchRLTemplateAgent loading models') fp = model_path if model_path else "model" self.model.load(fp, "models", for_inference=True) logger.info('PytorchRLTemplateAgent init complete') def check_gpu_available(self, context: Context): available = torch.cuda.is_available() req = os.environ.get('AIDO_REQUIRE_GPU', None) context.info( f'torch.cuda.is_available = {available!r} AIDO_REQUIRE_GPU = {req!r}' ) context.info('init()') if available: i = torch.cuda.current_device() count = torch.cuda.device_count() name = torch.cuda.get_device_name(i) context.info(f'device {i} of {count}; name = {name!r}') else: if req is not None: msg = 'I need a GPU; bailing.' context.error(msg) raise Exception(msg) def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: DB20Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.preprocessor.preprocess(obs) def compute_action(self, observation): #if observation.shape != self.preprocessor.transposed_shape: # observation = self.preprocessor.preprocess(observation) action = self.model.predict(observation) return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = DB20Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')