class UpgradeManager: def __init__(self): self.logger = Logger("UpgradeManager") def applyUpgrade(self, clientId, packageId, data): """ Applies (installs) a package :param clientId: The own client id :param packageId: The id of the package :param data: The package file as a binary string """ if not os.path.exists(Constants.CLIENT_PACKAGE_DIR): os.makedirs(Constants.CLIENT_PACKAGE_DIR) fileName = "package-%d-%d.zip" % (clientId, packageId) file = open("%s/%s" % (Constants.CLIENT_PACKAGE_DIR, fileName), "wb") file.write(data) file.close() self.logger.log("Received package from server. File: %s" % fileName) command = Constants.CLIENT_PACKAGE_COMMAND.replace( "[dir]", Constants.CLIENT_PACKAGE_DIR) command = command.replace("[package]", fileName) self.logger.log("Executing package command: %s" % command) os.system(command)
def Serve(sock: socket, logger: Logger): wmi = RecvWMI(sock) if wmi in WhoAmI: logger.addHost(*sock.getpeername(), 'is ' + WhoAmI[wmi]) _wmi2func[wmi](sock) else: # noinspection PyStringFormat raise SPException('Invalid WMI %d received from host %s:%d' % (wmi, *sock.getpeername()))
def __init__(self, env, test_env, algorithm, max_action, min_action, args): self.domain_type = args.domain_type self.env_name = args.env_name self.env = env self.test_env = test_env self.algorithm = algorithm self.max_action = max_action self.min_action = min_action self.discrete = args.discrete self.render = args.render self.max_step = args.max_step self.eval = args.eval self.eval_episode = args.eval_episode self.eval_step = args.eval_step self.episode = 0 self.episode_reward = 0 self.total_step = 0 self.local_step = 0 self.eval_num = 0 self.train_mode = None if args.train_mode == 'offline': self.train_mode = self.offline_train elif args.train_mode == 'online': self.train_mode = self.online_train elif args.train_mode == 'batch': self.train_mode = self.batch_train assert self.train_mode is not None self.log = args.log self.model = args.model self.model_freq = args.model_freq self.buffer = args.buffer self.buffer_freq = args.buffer_freq if self.log == True: self.logger = Logger(env, algorithm, args, file=args.file, tensorboard=args.tensorboard, numpy=args.numpy, model=args.model, buffer=args.buffer)
class On_policy_trainer: def __init__(self, env, test_env, algorithm, max_action, min_action, args): self.domain_type = args.domain_type self.env_name = args.env_name self.env = env self.test_env = test_env self.algorithm = algorithm self.max_action = max_action self.min_action = min_action self.discrete = args.discrete self.render = args.render self.max_step = args.max_step self.eval = args.eval self.eval_episode = args.eval_episode self.eval_step = args.eval_step self.episode = 0 self.episode_reward = 0 self.total_step = 0 self.local_step = 0 self.eval_num = 0 self.train_mode = None if args.train_mode == 'offline': self.train_mode = self.offline_train elif args.train_mode == 'online': self.train_mode = self.online_train elif args.train_mode == 'batch': self.train_mode = self.batch_train assert self.train_mode is not None self.log = args.log self.model = args.model self.model_freq = args.model_freq self.buffer = args.buffer self.buffer_freq = args.buffer_freq if self.log == True: self.logger = Logger(env, algorithm, args, file=args.file, tensorboard=args.tensorboard, numpy=args.numpy, model=args.model, buffer=args.buffer) def offline_train(self, d, local_step): if d: return True return False def online_train(self, d, local_step): return True def batch_train(self, d, local_step): #VPG, TRPO, PPO only if d or local_step == self.algorithm.batch_size: return True return False def evaluate(self): self.eval_num += 1 episode = 0 reward_list = [] while True: if episode >= self.eval_episode: break episode += 1 eval_reward = 0 observation = self.test_env.reset() if 'ram' in self.env_name: # Atari Ram state observation = observation / 255. done = False while not done: if self.render == True: if self.domain_type in {'gym', "atari"}: self.test_env.render() else: cv2.imshow( "{}_{}".format(self.algorithm.name, self.test_env.unwrapped.spec.id), self.test_env.render(mode='rgb_array', height=240, width=320)) cv2.waitKey(1) action = self.algorithm.eval_action(observation) if self.discrete == False: env_action = np.clip(action, -1, 1) else: env_action = action next_observation, reward, done, _ = self.test_env.step( self.max_action * env_action) eval_reward += reward observation = next_observation reward_list.append(eval_reward) print( "Eval | Average Reward {:.2f}, Max reward: {:.2f}, Min reward: {:.2f}, Stddev reward: {:.2f} " .format( sum(reward_list) / len(reward_list), max(reward_list), min(reward_list), np.std(reward_list))) if self.log == True: self.logger.log('Reward/Test', sum(reward_list) / len(reward_list), self.eval_num, False) self.logger.log('Max Reward/Test', max(reward_list), self.eval_num, False) self.logger.log('Min Reward/Test', min(reward_list), self.eval_num, False) self.logger.log('Stddev Reward/Test', np.std(reward_list), self.eval_num, True) def run(self): while True: if self.total_step > self.max_step: print("Training finished") break self.episode += 1 self.episode_reward = 0 self.local_step = 0 observation = self.env.reset() done = False while not done: self.local_step += 1 self.total_step += 1 if self.render == True: if self.domain_type in {'gym', "atari"}: self.env.render() else: cv2.imshow( "{}_{}".format(self.algorithm.name, self.env.unwrapped.spec.id), self.env.render(mode='rgb_array', height=240, width=320)) cv2.waitKey(1) if 'ram' in self.env_name: # Atari Ram state observation = observation / 255. if self.total_step <= self.algorithm.training_start: action = self.env.action_space.sample() next_observation, reward, done, _ = self.env.step(action) else: action, log_prob = self.algorithm.get_action(observation) if self.discrete == False: env_action = np.clip(action, -1, 1) else: env_action = action next_observation, reward, done, _ = self.env.step( self.max_action * env_action) if self.local_step + 1 == self.env._max_episode_steps: real_done = 0. else: real_done = float(done) self.episode_reward += reward if self.env_name == 'Pendulum-v0': reward = (reward + 8.1) / 8.1 self.algorithm.buffer.add(observation, action, reward, next_observation, real_done, log_prob) observation = next_observation if self.total_step >= self.algorithm.training_start and self.train_mode( done, self.local_step): loss_list = self.algorithm.train( self.algorithm.training_step) if self.log == True: for loss in loss_list: self.logger.log(loss[0], loss[1], self.total_step, str(self.episode)) if self.eval == True and self.total_step % self.eval_step == 0: self.evaluate() if self.log == True: if self.model == True and self.total_step % self.model_freq == 0: self.logger.save_model(self.algorithm, step=self.total_step) if self.buffer == True and self.total_step % self.buffer_freq == 0: self.logger.save_buffer(buffer=self.algorithm.buffer, step=self.total_step) print( "Train | Episode: {}, Reward: {:.2f}, Local_step: {}, Total_step: {}," .format(self.episode, self.episode_reward, self.local_step, self.total_step)) if self.log == True: self.logger.log('Reward/Train', self.episode_reward, self.episode, False) self.logger.log('Step/Train', self.local_step, self.episode, False) self.logger.log('Total Step/Train', self.total_step, self.episode, True)
def __init__(self): self.logger = Logger("UpgradeManager")