def __init__(self, te=None, ts=None, c=None, beta=None): self.te = TrafficEmulator() if te is None else te self.ts = TrafficServer() if ts is None else ts self.c = DummyController() if c is None else c self.epoch = 0 self.last_reward = None self.last_cost = None self.BETA = beta
class Emulation: def __init__(self, te=None, ts=None, c=None, beta=None): self.te = TrafficEmulator() if te is None else te self.ts = TrafficServer() if ts is None else ts self.c = DummyController() if c is None else c self.epoch = 0 self.last_reward = None self.last_cost = None self.BETA = beta def step(self): if self.last_reward is None or self.last_cost is None: system_reward = None else: system_reward = (self.last_reward - self.last_cost) if self.BETA is None \ else (self.BETA*self.last_reward - (1-self.BETA)*self.last_cost) print "Last reward: {}".format(system_reward) observation = self.get_observation_() print "Observation: {}".format(observation) if observation is None: print "Run out of data, please reset!" return None control, update_result = self.c.observe_and_control(observation=observation, last_reward=system_reward) print "Control: {}, Agent update: {}".format(control, update_result) cost, reward = self.control_and_reward_(control=control) print "Cost: {}, Reward: {}".format(cost, reward) self.last_cost = cost self.last_reward = reward self.epoch += 1 return observation, control, cost, reward def reset(self): self.te.reset() self.ts.reset() self.c.reset() self.epoch = 0 self.last_reward = None self.last_cost = None def get_observation_(self): traffic_df = self.te.generate_traffic() if traffic_df is None: print "Run out of data, please reset environment!" return None else: observation = self.ts.observe(traffic_df=traffic_df) return observation def control_and_reward_(self, control): service_df, cost = self.ts.get_service_and_cost(control=control) reward = self.te.serve_and_reward(service_df=service_df) return cost, reward
class Emulation: def __init__(self, te=None, ts=None, c=None, beta=None): self.te = TrafficEmulator() if te is None else te self.ts = TrafficServer() if ts is None else ts self.c = DummyController() if c is None else c self.epoch = 0 self.last_reward = None self.last_cost = None self.BETA = beta def step(self): if self.last_reward is None or self.last_cost is None: system_reward = None else: system_reward = (self.last_reward + self.last_cost) if self.BETA is None \ else (self.BETA*self.last_reward + (1-self.BETA)*self.last_cost) print "Emulation.step():", print "last reward: {}".format(system_reward) observation = self.get_observation_() print "Emulation.step():", print "observation: {}".format(observation) if observation is None: print "Emulation.step():", print "run out of data, please reset!" return None control, update_result = self.c.observe_and_control( observation=observation, last_reward=system_reward) print "Emulation.step():", print "control: {}, agent update: {}".format(control, update_result) cost, reward = self.control_and_reward_(control=control) print "Emulation.step():", print "cost: {}, reward: {}".format(cost, reward) self.last_cost = cost self.last_reward = reward self.epoch += 1 return observation, control, cost, reward def reset(self): self.te.reset() self.ts.reset() self.c.reset() self.epoch = 0 self.last_reward = None self.last_cost = None def get_observation_(self): traffic_df = self.te.generate_traffic() if traffic_df is None: print "Emulation.step():", print "run out of data, please reset environment!" return None else: observation = self.ts.observe(traffic_df=traffic_df) return observation def control_and_reward_(self, control): service_df, cost = self.ts.get_service_and_cost(control=control) reward = self.te.serve_and_reward(service_df=service_df) return cost, reward