def __init__(self, brain, name): """ Constructs the Simulator class. Arguments: brain: The BRAIN you wish to train or predict against. name: The Simulator name. Must match the name in Inkling. """ self.name = name self.brain = brain self.writer = None self._construct_writer() # Every simulator requires its own event loop, so that heartbeat tasks # do not conflict with one another. Loops may be idle for arbitrarily # long periods of time - if sim B awakens the loop while sim A is not # currently being read, sim A's heartbeat disconnect will trip. self._ioloop = asyncio.new_event_loop() self._impl = Simulator_WS(brain, self, name, self._ioloop) # statistics self.episode_reward = 0 self.episode_count = 0 self._episode_rate = _RateCounter() self.iteration_count = 0 # NOTE: _iteration_rate is accumulative, not per episode self._iteration_rate = _RateCounter() self._reset_rate_counter = True
def __init__(self, brain, name): """ Constructs the Simulator class. Arguments: brain: The BRAIN you wish to train or predict against. name: The Simulator name. Must match the name in Inkling. """ self.name = name self.brain = brain self.writer = None self._construct_writer() self._ioloop = IOLoop() self._impl = Simulator_WS(brain, self, name) # statistics self.episode_reward = 0 self.episode_count = 0 self._episode_rate = _RateCounter() self.iteration_count = 0 # NOTE: _iteration_rate is accumulative, not per episode self._iteration_rate = _RateCounter()
class Simulator(object): """ This class is used to interface with the server while training or running predictions against a BRAIN. It is an abstract base class, and to use it a developer must create a subclass. The `Simulator` class is closely related to the Inkling file that is associated with the BRAIN. The name used to construct `Simulator` must match the name of the simulator in the Inkling file. There are two main methods that you must override, `episode_start` and `simulate`. At the start of a session, `episode_start` is called once, then `simulate` is called repeatedly until the `terminal` flag is returned as `True` or the next `episode_start` interrupts the simulation. Attributes: brain: The BRAIN to connect to. name: The name of this Simulator. Must match simulator in Inkling. objective_name: The name of the current objective for an episode. episode_reward: Cumulative reward for this episode so far. episode_count: Number of completed episodes since sim launch. episode_rate: Episodes per second. R/O iteration_count: Number of iterations for this episode. iteration_rate: Iterations per second Example Inkling: simulator my_simulator(Config) action (Action) state (State) end Example Code: class MySimulator(bonsai_ai.Simulator): def __init__(brain, name): super().__init__(brain, name) # your sim init code goes here. def episode_start(self, parameters=None): # your reset/init code goes here. return my_state def simulate(self, action): # your simulation stepping code goes here. return (my_state, my_reward, is_terminal) def episode_finish(self): print('Episode reward', self.episode_reward) ... config = bonsai_ai.Config(sys.argv) brain = bonsai_ai.Brain(config) sim = MySimulator(brain, "my_simulator") while sim.run(): continue """ WRITERS = { '.json': JSONWriter, '.csv': CSVWriter } def __init__(self, brain, name): """ Constructs the Simulator class. Arguments: brain: The BRAIN you wish to train or predict against. name: The Simulator name. Must match the name in Inkling. """ self.name = name self.brain = brain self.writer = None self._construct_writer() self._ioloop = asyncio.get_event_loop() self._impl = Simulator_WS(brain, self, name) # statistics self.episode_reward = 0 self.episode_count = 0 self._episode_rate = _RateCounter() self.iteration_count = 0 # NOTE: _iteration_rate is accumulative, not per episode self._iteration_rate = _RateCounter() self._reset_rate_counter = True def _construct_writer(self): def raise_rte(fname): raise RuntimeError( """ Record file name must include a supported extension (.json|.csv): {} """.format(fname)) if self.brain.config.record_enabled: self.writer = self.WRITERS.get( self.brain.config.record_format, raise_rte)( self.brain.config.record_file) def __repr__(self): """ Return a JSON representation of the Simulator. """ return '{{'\ 'name: {self.name!r}, ' \ 'objective_name: {self._impl.objective_name!r}, ' \ 'predict: {self.predict!r}, ' \ 'brain: {self.brain!r}, ' \ 'episode_reward: {self.episode_reward!r}, ' \ 'episode_count: {self.episode_count!r}, ' \ 'episode_rate: {self.episode_rate!r}, ' \ 'iteration_count: {self.iteration_count!r}, ' \ 'iteration_rate: {self.iteration_rate!r}' \ '}}'.format(self=self) @property def predict(self): """ True if simulation is configured for prediction. """ return self.brain.config.predict @property def objective_name(self): """ Current episode objective name. """ return self._impl.objective_name @property def episode_rate(self): """ Episodes per second. """ return int(self._episode_rate.rate) @property def iteration_rate(self): """ Iterations per second. """ return int(self._iteration_rate.rate) def episode_start(self, episode_config): """ Called at the start of each new episode. This callback passes in a set of initial parameters and expects an initial state in return for the simulator. Before this callback is called, the property `objective_name` will be updated to reflect the current objective for this episode. This call is where a simulation should be reset for the next round. Arguments: episode_config: A dict of config paramters defined in Inkling. Returns: A dictionary of the initial state of the simulation as defined in inkling. Example Inkling: schema Config UInt8 start_angle end schema State Float32 angle, Float32 velocity end Example Code: def episode_start(self, params): # training params are only passed in during training if self.predict == False: print(self.objective_name) self.angle = params.start_angle initial = { "angle": self.angle, "velocity": self.velocity, } return initial """ raise NotImplementedError( 'Abstract method episode_start() has not been implemented') def simulate(self, action): """ This callback steps the simulation forward by a single step. It passes in the `action` to be taken, and expects the resulting `state`, `reward` for the current `objective`, and a `terminal` flag used to signal the end of an episode. Note that an episode may be reset prematurely by the backend during training. For a multi-lesson curriculum, the `objective_name` will change from episode to episode. In this case ensure that the simulator is returning the correct reward for the different lessons. Returning `True` for the `terminal` flag signals the start of a new episode. Arguments: action: A dict as defined in Inkling of the action to take. Returns: A tuple of (state, reward, terminal). state: A dict as defined in Inkling of the sim state. reward: A real number describing the reward for this step. terminal: True if the simulation has ended or terminated. False if it should continue. Example Inkling: schema Action Int8{0, 1} delta end Example Code: def simulate(self, action): velocity = velocity - action.delta; terminal = (velocity <= 0.0) # reward is only needed during training if self.predict == False: reward = reward_for_objective(self.objective_name) state = { "velocity": self.velocity, "angle": self.angle, } return (state, reward, terminal) """ raise NotImplementedError( 'Abstract method simulate() has not been implemented') def episode_finish(self): """ This callback is called at the end of every episode before the next episode_start(). You can use it to do post episode cleanup or statistics reporting. """ pass def standby(self, reason): log.info(reason) @property def record_file(self): """ Get or set the name of the file to which analytics logs should be written. If this property is set during simulation, subsequent log entries will be written to the new file. """ if self.writer is not None: return self.writer.record_file @property def sim_id(self): """ ID for an active Simulator connection. Returns "" if this Simulator not yet connected to the MT service. Once a connection is made, this property persists until the next successful connection by this object. """ id = self._impl._sim_id return str(id) if id else '' @record_file.setter def record_file(self, new_file): if self.writer is not None: self.writer.record_file = new_file def enable_keys(self, keys, prefix=None): """ Adds the given keys to the log schema for the active writer. If one is provided, the prefix will be prepended to those keys and they will appear as such in the resulting logs. If recording is not enabled, this method has no effect. You should enable any keys you expect to see in the logs. If you attempt to insert objects containing keys which have not been enabled, those keys will be silently ignored. Arguments: keys: The keys to enable. prefix: Prepended to each item in `keys`. As in `<prefix>.<key>`. Returns: None """ if self.writer is not None: self.writer.enable_keys(keys, prefix) def record_append(self, obj, prefix=None): """ This function adds the given dictionary to the currently buffered log line, prepending `prefix` to each key (`<prefix>.<key>`) before doing so. If recording is not enabled, this method has no effect. If a particular key is not enabled for the active writer, it will be silently ignored. This has no effect on the inclusion of other keys in the given dictionary. Arguments: obj: The dictionary to add. prefix: Prepended to each key in `object`. As in `<prefix>.<key>`. """ if self.writer is not None: self.writer.add(obj, prefix) def flush_record(self): """ Flush the current record buffer, writing its contents to disk. This action is performed automatically at the end of every call to Simulator.run, but this flush_record allows event-driven simulator integrations to take advantage of structured recording functionality. """ if self.writer is not None: self.writer.write() def _on_episode_start(self, episode_config): """ Callback hook for episode_start, called by event dispatcher """ # update counters self.iteration_count = 0 self.episode_reward = 0 if self._reset_rate_counter: self._episode_rate.reset() self._iteration_rate.reset() self._reset_rate_counter = False init_state = self.episode_start(episode_config) if self.writer is not None: self._record_state(init_state, config=episode_config) return init_state def _on_simulate(self, action): """ Callback hook for simulate, called by event dispatcher. """ # update counters self._iteration_rate.update() self.iteration_count += 1 # step state, reward, terminal = self.simulate(action) self.episode_reward += reward if self.writer is not None: self._record_state(state, action, reward, terminal) return state, float(reward), bool(terminal) def _on_episode_finish(self): """ Callback hook for end of episode, called by event dispatcher. """ # update counters self._episode_rate.update() self.episode_count += 1 # userland callback self.episode_finish() def _record_state(self, state, action={}, reward=None, terminal=None, config={}): self.writer.add(config, 'config') self.writer.add(action, 'action') self.writer.add(state, 'state') self.writer.add({ 'reward': reward, 'terminal': terminal, 'predict': self.predict, 'time': self._now(), 'simulator': self.name, 'sim_id': self._impl._sim_id }) self.writer.add({ 'episode_reward': self.episode_reward, 'episode_count': self.episode_count, 'episode_rate': self.episode_rate, 'iteration_count': self.iteration_count, 'iteration_rate': self.iteration_rate }, 'statistics') def _now(self): return datetime.fromtimestamp( time()).strftime("%Y-%m-%d %H:%M:%S") def close(self): """ Closes websocket Connection """ if self._impl._receive_handle: self._impl._receive_handle.cancel() self._ioloop.run_until_complete(self._impl._sim_connection.close()) def get_next_event(self): """ Advance the SDK's internal state machine and return an event for processing. Returns: an instance of `Event` Example: event = self.get_next_event() if isinstance(event, EpisodeStartEvent): # do something elif isinstance(event, SimulateEvent): # do something else elif isinstance(event, EpisodeFinishEvent): # book keeping else: # do nothing """ try: event = None event = self._ioloop.run_until_complete( self._impl.get_next_event()) except KeyboardInterrupt: event = FinishedEvent() except BonsaiClientError as e: log.error(e) raise e.original_exception except BonsaiServerError as e: log.error(e) event = FinishedEvent() except SimStateError as e: log.error(e) raise e finally: if event is None or isinstance(event, FinishedEvent): self.close() return event def run(self): """ Main loop call for driving the simulation. Returns `False` when the simulation has finished or halted. The client should call this method in a `while` loop until it returns `False`. To run for prediction, `brain.config.predict` must return `True`. Example: sim = MySimulator(brain) if sim.predict: print("Predicting against ", brain.name, " version ", brain.version) else: print("Training ", brain.name) while sim.run(): continue """ try: success = False success = self._ioloop.run_until_complete(self._impl.run()) except KeyboardInterrupt: pass except BonsaiClientError as e: log.error(e) raise e.original_exception except BonsaiServerError as e: log.error(e) except SimStateError as e: log.error(e) raise e finally: if not success: if self.writer is not None: self.writer.close() self.close() return success