class ModelTrainer(object): def __init__(self, sim, predict=False): self._sim = sim self.episode_count = 0 self.reset_iteration_metrics() print('model type that is being sent to the star is:', modeltype) time.sleep(1) self.star = Star(predict,modeltype=modeltype) self.star.logger = sim.results_logger if log_iterations is True: self.logged_observations = self.star.define_logged_observations() self.logged_observations = self.update_logged_observations(self.logged_observations) bonsai_tools.log_observations_columns(self.star.logger, self.logged_observations) def episode_start(self,event): if getattr(self._sim, 'sim_id', -1) == -1: self.sim_id = self._sim._impl._sim_id #if self.sim_id != -1: # print('SimID', self.sim_id) self.start_episode() self.episode_count += 1 event.initial_state = self.star.get_state() event.terminal = self.star.get_terminal(event.initial_state) event.reward = 0 #the initial reward is an arbitrary value since there are no actions taken by BRAIN in initial state if log_iterations is True: self.logged_observations = self.star.define_logged_observations() self.logged_observations = self.update_logged_observations(self.logged_observations) bonsai_tools.log_iteration(self.star.logger, self.logged_observations) def run(self): event = self._sim.get_next_event() if isinstance(event, EpisodeStartEvent): log.event("Episode Start Train") self.episode_start(event) # Receive the action from the BRAIN as event.action, run the simulation one step and return the state, action, and reward to the BRAIN. elif isinstance(event, SimulateEvent): log.event("Simulate") self.iteration_count += 1 self.action = event.action self.star.set_action(self.action) event.state = self.star.get_state() event.terminal = self.star.get_terminal(event.state) event.reward = self.star.get_reward(event.state, event.terminal) #print(event.state) self.reward = event.reward self.terminal = event.terminal self.episode_reward += event.reward self.logged_observations = self.star.define_logged_observations() self.logged_observations = self.update_logged_observations(self.logged_observations) if log_iterations is True: bonsai_tools.log_iteration(self.star.logger, self.logged_observations) else: bonsai_tools.print_progress(self.logged_observations) # The episode is terminal. Finish the episode. elif isinstance(event, EpisodeFinishEvent): log.event("Episode Finish") print("episode count: {}, iteration count: {}, episode reward: {:6.2f}".format( self.episode_count, self.iteration_count, self.episode_reward)) elif isinstance(event, FinishedEvent): log.event("Finished") return False elif event is None: return False return True def start_episode(self, config=None): self.star.reset() self.reset_iteration_metrics() def reset_iteration_metrics(self): """Executed once every start of episode """ self.reward = 0 self.terminal = False self.episode_reward = 0.0 self.iteration_count = 0 def update_logged_observations(self, logged_observations): updated_observations = { 'episode_count':self.episode_count, 'iteration_count':self.iteration_count, 'terminal':self.terminal, 'reward':self.reward, 'episode_reward':self.episode_reward } #updated_observations.update(self.action) updated_observations.update(logged_observations) return updated_observations
class ModelTrainer(object): def __init__(self, sim, predict=False): self._sim = sim self.episode_count = 0 self.reset_iteration_metrics() self.star = Star(predict) self.star.logger = sim.results_logger if log_iterations is True: self.logged_observations = self.star.define_logged_observations() self.logged_observations = self.update_logged_observations( self.logged_observations) bonsai_tools.log_observations_columns(self.star.logger, self.logged_observations) def episode_start(self, event): if getattr(self._sim, 'sim_id', -1) == -1: self.sim_id = self._sim._impl._sim_id #if self.sim_id != -1: # print('SimID', self.sim_id) self.start_episode() self.episode_count += 1 # Check https://docs.bons.ai/references/library-reference.html#event-class for SDK event class documentation from Product event.initial_state = self.star.get_state() event.terminal = self.star.get_terminal(event.initial_state) event.reward = 0 #the initial reward is an arbitrary value since there are no actions taken by BRAIN in initial state if log_iterations is True: self.logged_observations = self.star.define_logged_observations() self.logged_observations = self.update_logged_observations( self.logged_observations) bonsai_tools.log_iteration(self.star.logger, self.logged_observations) def run(self): event = self._sim.get_next_event() if isinstance(event, EpisodeStartEvent): log.event("Episode Start Train") self.episode_start(event) # Receive the action from the BRAIN as event.action, run the simulation one step and return the state, action, and reward to the BRAIN. elif isinstance(event, SimulateEvent): log.event("Simulate") self.iteration_count += 1 self.action = event.action self.star.set_action(self.action) event.state = self.star.get_state() event.terminal = self.star.get_terminal(event.state) event.reward = self.star.get_reward(event.state, event.terminal) #print(event.state) self.reward = event.reward self.terminal = event.terminal self.episode_reward += event.reward self.logged_observations = self.star.define_logged_observations() self.logged_observations = self.update_logged_observations( self.logged_observations) if log_iterations is True: bonsai_tools.log_iteration(self.star.logger, self.logged_observations) else: bonsai_tools.print_progress(self.logged_observations) # The episode is terminal. Finish the episode. elif isinstance(event, EpisodeFinishEvent): log.event("Episode Finish") print( "episode count: {}, iteration count: {}, episode reward: {:6.2f}" .format(self.episode_count, self.iteration_count, self.episode_reward)) elif isinstance(event, FinishedEvent): log.event("Finished") return False elif event is None: return False return True def start_episode(self, config=None): self.star.simulator_reset_config() self.reset_iteration_metrics() def reset_iteration_metrics(self): """Executed once every start of episode """ self.reward = 0 self.terminal = False self.episode_reward = 0.0 self.iteration_count = 0 self._cpu_pc = psutil.cpu_percent() self._vmem = psutil.virtual_memory().percent def update_logged_observations(self, logged_observations): self._cpu_pc = psutil.cpu_percent() self._vmem = psutil.virtual_memory().percent updated_observations = { 'episode_count': self.episode_count, 'iteration_count': self.iteration_count, 'terminal': self.terminal, 'reward': self.reward, 'episode_reward': self.episode_reward, 'cpu_pc': self._cpu_pc, 'vmem': self._vmem } #updated_observations.update(self.action) updated_observations.update(logged_observations) return updated_observations
if __name__="__main__": star = Star() episode_length=284 markovian_order= for in range (0, parser.num_samples): state = star.get_state() for i in range(284): star.simulator_reset_config() observations_df = pd.DataFrame() brain_action = simple_brain_controller(state) sim_action = action = {'hvacON':np.random.randint(0,2)} star.set_action(sim_action) terminal = star.get_terminal(state) reward = star.get_reward(state, terminal) state = star.get_state() print(state) observations = star.model.simulator_get_observations() observations.update(state) observations.update({'terminal':terminal}) observations.update({'reward':reward}) observations.update({'brain_action':brain_action}) observations.update({'sim_action':sim_action}) observations_df = observations_df.append(observations,ignore_index=True) print(observations) observations_df.plot(title='simulation integration plot') plt.xlabel('iteration count') plt.ylabel('observations')