def __init__(self): self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0", for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'seed': flags.seed, # 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) self.episode_reward = 0 self.cnt_success = 0
def build_global_network(self, learning_rate_input): environment = Environment.create_environment(flags.env_type, -1) state_shape = environment.get_state_shape() agents_count = environment.get_situations_count() action_size = environment.get_action_size() self.global_network = MultiAgentModel( -1, state_shape, agents_count, action_size, flags.entropy_beta, self.device ) return RMSPropApplier(learning_rate = learning_rate_input, decay = flags.rmsp_alpha, momentum = 0.0, epsilon = flags.rmsp_epsilon, clip_norm = flags.grad_norm_clip, device = self.device)
def __init__(self, display_size): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0", for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.episode_reward = 0
def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, grad_applier, max_global_time_step, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.max_global_time_step = max_global_time_step self.action_size = Environment.get_action_size() self.local_network = UnrealModel(self.action_size, thread_index, device) self.local_network.prepare_loss() self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss, global_network.get_vars(), self.local_network.get_vars()) self.sync = self.local_network.sync_from(global_network) self.environment = Environment.create_environment() self.experience = Experience(EXPERIENCE_HISTORY_SIZE) self.local_t = 0 self.initial_learning_rate = initial_learning_rate self.episode_reward = 0 # For log output self.prev_local_t = 0
def __init__(self, thread_index, global_network, initial_learning_rate, env_args, use_pixel_change, use_value_replay, use_reward_prediction, pixel_change_lambda, entropy_beta, local_t_max, gamma, gamma_pc, experience_history_size, max_global_time_step, spatial_dim, optimizor): self.thread_index = thread_index self.env_args = env_args self.use_pixel_change = use_pixel_change self.use_value_replay = use_value_replay self.use_reward_prediction = use_reward_prediction self.local_t_max = local_t_max self.gamma = gamma self.gamma_pc = gamma_pc self.experience_history_size = experience_history_size self.max_global_time_step = max_global_time_step self.action_size = Environment.get_action_size() self.local_network = Agent(thread_index, use_pixel_change, use_value_replay, use_reward_prediction, pixel_change_lambda, entropy_beta) self.global_network = global_network self.experience = Experience(self.experience_history_size) self.local_t = 0 self.initial_learning_rate = initial_learning_rate self.episode_reward = 0 self.spatial_dim = spatial_dim self.obs_processer = ObsProcesser() self.action_processer = ActionProcesser(dim=spatial_dim) self.optimizor = optimizor self.distribution = th.distributions.Categorical # For log output self.prev_local_t = 0 self.environment = Environment.create_environment(self.env_args)
def prepare(self): if self.running: self.environment = Environment.create_environment( self.maze_size, self.level_seed) print('Started trainer ', self.thread_index) self.apply_next_location_loss = 0.0 sys.stdout.flush()
def __init__(self, model_size, group_id, environment_id=0, training=True): self.model_size = model_size self._training = training self.environment_id = environment_id self.group_id = group_id # Build environment self.environment = Environment.create_environment(flags.env_type, self.environment_id, self._training) self.extrinsic_reward_manipulator = eval(flags.extrinsic_reward_manipulator) self.terminal = True self._composite_batch = CompositeBatch(maxlen=flags.replay_buffer_size if flags.replay_mean > 0 else 1) # Statistics self.__client_statistics = Statistics(flags.episode_count_for_evaluation) if self._training: #logs if not os.path.isdir(flags.log_dir + "/performance"): os.mkdir(flags.log_dir + "/performance") if not os.path.isdir(flags.log_dir + "/episodes"): os.mkdir(flags.log_dir + "/episodes") formatter = logging.Formatter('%(asctime)s %(message)s') # reward logger self.__reward_logger = logging.getLogger('reward_{}_{}'.format(self.group_id, self.environment_id)) hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_{}_{}.log'.format(self.group_id, self.environment_id)) hdlr.setFormatter(formatter) self.__reward_logger.addHandler(hdlr) self.__reward_logger.setLevel(logging.DEBUG) self.__max_reward = float("-inf")
def __init__(self): self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) print('flags:use_pixel_change {}'.format(flags.use_pixel_change)) sleep(10) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0", for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) print('\n======\nENV in Evaluate::ctor') print(self.environment) print(self.global_network) print('val_replay!!! {}'.format(flags.use_value_replay)) print(flags.split) print('=======\n') sleep(10) self.episode_reward = 0
def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, grad_applier, env_type, entropy_beta, local_t_max, gamma, max_global_time_step, device): self.stats = {} self.thread_index = thread_index self.global_network = global_network self.grad_applier = grad_applier #logs formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') self.info_logger = logging.getLogger('info_' + str(thread_index)) hdlr = logging.FileHandler(flags.log_dir + '/performance/info_' + str(thread_index) + '.log') hdlr.setFormatter(formatter) self.info_logger.addHandler(hdlr) self.info_logger.setLevel(logging.DEBUG) self.reward_logger = logging.getLogger('reward_' + str(thread_index)) hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_' + str(thread_index) + '.log') hdlr.setFormatter(formatter) self.reward_logger.addHandler(hdlr) self.reward_logger.setLevel(logging.DEBUG) self.max_reward = float("-inf") #trainer self.learning_rate_input = learning_rate_input self.env_type = env_type self.local_t_max = local_t_max self.gamma = gamma self.environment = Environment.create_environment( self.env_type, self.thread_index) self.action_size = self.environment.get_action_size() state_shape = self.environment.get_state_shape() agents_count = self.environment.get_situations_count() self.max_global_time_step = max_global_time_step self.entropy_beta = entropy_beta self.device = device # build network self.local_network = MultiAgentModel(self.thread_index, state_shape, agents_count, self.action_size, self.entropy_beta, self.device) self.apply_gradients = [] self.sync = [] for i in range(self.local_network.agent_count): local_agent = self.local_network.get_agent(i) global_agent = self.global_network.get_agent(i) local_agent.prepare_loss() self.apply_gradients.append( self.grad_applier.minimize_local(local_agent.total_loss, global_agent.get_vars(), local_agent.get_vars())) self.sync.append(local_agent.sync_from(global_agent)) self.local_t = 0 self.initial_learning_rate = initial_learning_rate # For log output self.prev_local_t = 0
def __init__(self): self.env = Environment.create_environment() if os.path.exists('human_exp.pkl'): with open('human_exp.pkl', 'r') as f: self.ExpPool = pkl.load(f) else: self.ExpPool = Experience(MAX_EXP) pygame.init() self.surface = pygame.display.set_mode(DISP_SIZE, 0) pygame.display.set_caption('Recorder')
def test(self): result_file = '{}/test_results_{}.log'.format(flags.log_dir, self.global_step) if os.path.exists(result_file): print('Test results already produced and evaluated for {}'.format( result_file)) return result_lock = RLock() print('Start testing') testers = [] threads = [] tf_session = tf.get_default_session() tmp_environment = Environment.create_environment( env_type=flags.env_type, training=False) dataset_size = tmp_environment.get_dataset_size() data_per_thread = max(1, dataset_size // self.thread_count) for i in range(self.thread_count): # parallel testing tester = Group(group_id=-(i + 1), environment_count=data_per_thread, global_network=self.global_network, training=False) data_range_start = i * data_per_thread data_range_end = data_range_start + data_per_thread # print(data_range_start, data_per_thread, dataset_size) thread = Thread(target=self.test_function, args=(result_file, result_lock, tester, (data_range_start, data_range_end), tf_session)) thread.start() threads.append(thread) testers.append(tester) print('Test Set size:', dataset_size) print('Tests per thread:', data_per_thread) time.sleep(5) for thread in threads: # wait for all threads to end thread.join() print('End testing') # get overall statistics test_statistics = Statistics(self.thread_count) for group in testers: test_statistics.add(group.get_statistics()) info = test_statistics.get() # write results to file stats_file = '{}/test_statistics.log'.format(flags.log_dir) with open(stats_file, "a", encoding="utf-8") as file: # write stats to file file.write('{}\n'.format([ "{}={}".format(key, value) for key, value in sorted(info.items(), key=lambda t: t[0]) ])) print('Test statistics saved in {}'.format(stats_file)) print('Test results saved in {}'.format(result_file)) return tmp_environment.evaluate_test_results(result_file)
def __init__(self): self.img = np.zeros(shape=(HEIGHT, WIDTH, 3), dtype=np.uint8) self.action_size = Environment.get_action_size() self.global_network = UnrealModel(self.action_size, -1, "/cpu:0", for_display=True) self.env = Environment.create_environment() self.value_history = ValueHistory() self.state_history = StateHistory() self.ep_reward = 0 self.mazemap = MazeMap()
def __init__(self, display_size): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) name = 'UNREAL' if flags.segnet == 0 else "A3C ErfNet" pygame.display.set_caption(name) env_config = sim_config.get(flags.env_name) self.image_shape = [ env_config.get('height', 88), env_config.get('width', 88) ] segnet_param_dict = {'segnet_mode': flags.segnet} is_training = tf.placeholder(tf.bool, name="training") map_file = env_config.get('objecttypes_file', '../../objectTypes.csv') self.label_mapping = pd.read_csv(map_file, sep=',', header=0) self.get_col_index() self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/gpu:0", segnet_param_dict=segnet_param_dict, image_shape=self.image_shape, is_training=is_training, n_classes=flags.n_classes, segnet_lambda=flags.segnet_lambda, dropout=flags.dropout, for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, flags.termination_time_sec, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.episode_reward = 0
def build_global_network(self, learning_rate_input): environment = Environment.create_environment(flags.env_type, -1, self.training_set, shuffle=False) self.global_network = ModelManager(-1, environment, learning_rate_input, self.device) # return gradient optimizer return RMSPropApplier(learning_rate=learning_rate_input, decay=flags.rmsp_alpha, momentum=0.0, epsilon=flags.rmsp_epsilon, clip_norm=flags.grad_norm_clip, device=self.device)
def __init__(self, group_id, environment_count, global_network, training=True): self.group_id = group_id self.training = training # Get environment info tmp_environment = Environment.create_environment( env_type=flags.env_type, training=training) self.environment_info = { 'state_shape': tmp_environment.get_state_shape(), 'action_shape': tmp_environment.get_action_shape(), 'state_scaler': tmp_environment.state_scaler, 'has_masked_actions': tmp_environment.has_masked_actions(), } # Build network_manager self.network_manager = NetworkManager( group_id=self.group_id, environment_info=self.environment_info, global_network=global_network, training=self.training) # Build environments self.environment_count = environment_count self.worker_list = [ EnvironmentManager(model_size=self.network_manager.model_size, environment_id=env_id, group_id=group_id, training=training) for env_id in range(self.environment_count) ] # State distribution estimator self.state_distribution_estimator = [ RunningMeanStd(batch_size=flags.batch_size, shape=shape) for shape in self.environment_info['state_shape'] ] self.network_manager.state_mean = [ estimator.mean for estimator in self.state_distribution_estimator ] self.network_manager.state_std = [ estimator.std for estimator in self.state_distribution_estimator ] ImportantInformation( self.state_distribution_estimator, 'state_distribution_estimator{}'.format(self.group_id)) # Statistics self.group_statistics = IndexedStatistics( max_count=self.environment_count, buffer_must_be_full=True) self.has_terminal_worker = False self.terminated_episodes = 0
def __init__(self, args, display_size, saver): pygame.init() self.args = args self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') args.action_size = Environment.get_action_size(args.env_name) self.global_network = Agent(1, args) saver.restore(self.global_network) self.global_network.eval() self.environment = Environment.create_environment(args.env_name) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.distribution = torch.distributions.Categorical self.episode_reward = 0
def test_step(self): environment = Environment.create_environment() action_size = Environment.get_action_size() if sys.platform == 'darwin': self.assertTrue(action_size == 6) else: self.assertTrue(action_size == 8) for i in range(3): self.assertTrue(environment.last_observation.shape == (84, 84)) if SAVE_IMAGE: scipy.misc.imsave("debug_observation{0}.png".format(i), environment.last_observation) reward, terminal = environment.step(0)
def __init__(self): self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size(flags.env_type, flags.env_name) env_config = sim_config.get(flags.env_name) self.image_shape = [env_config['height'], env_config['width']] segnet_param_dict = {'segnet_mode': flags.segnet} is_training = tf.placeholder(tf.bool, name="training") # for display param in UnrealModel says its value self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, #flags.pixel_change_lambda 0.0, #flags.entropy_beta device, segnet_param_dict=segnet_param_dict, image_shape=self.image_shape, is_training=is_training, n_classes=flags.n_classes, segnet_lambda=flags.segnet_lambda, dropout=flags.dropout, for_display=True) self.environment = Environment.create_environment(flags.env_type, flags.env_name, flags.termination_time_sec, env_args={'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene}) self.global_network.prepare_loss() self.total_loss = [] self.segm_loss = [] self.episode_reward = [0] self.episode_roomtype = [] self.roomType_dict = {} self.segnet_class_dict = {} self.success_rate = [] self.batch_size = 20 self.batch_cur_num = 0 self.batch_prev_num = 0 self.batch_si = [] self.batch_sobjT = [] self.batch_a = [] self.batch_reward = []
def __init__(self, display_size): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') self.action_size = Environment.get_action_size() self.global_network = UnrealModel(self.action_size, -1, "/cpu:0", for_display=True) self.environment = Environment.create_environment() self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.episode_reward = 0
def check_environment(self, env_type, env_name): env = Environment.create_environment(env_type, env_name, 0) action_size = Environment.get_action_size(env_type, env_name) for i in range(3): state, reward, terminal = env.process(0) print(state) print(reward) print(terminal) # # Check shape # self.assertTrue(state.shape == (84, 84, 3)) # # state and pixel_change value range should be [0,1] # self.assertTrue(np.amax(state) <= 1.0) env.stop()
def run(args, server): # create an environment chosed by global ENV_TYPE env = Environment.create_environment() trainer = UNREAL(env, args.task, args.visualise) variables_to_save = [v for v in tf.global_variables() if not v.name.startswith('local')] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() saver = tf.train.Saver(variables_to_save) def init_fn(sess): logger.info('Initializing all parameters...') sess.run(init_all_op) config = tf.ConfigProto(device_filters=['/job:ps', '/job:worker/task:{}'.format(args.task)]) logdir = os.path.join(args.log_dir, 'train') summary_writer = tf.summary.FileWriter(logdir + '_%d' % args.task) logger.info('Event directory: %s_%s', logdir, args.task) sv = tf.train.Supervisor(is_chief = (args.task == 0), logdir = logdir, saver = saver, summary_op = None, init_op = init_op, init_fn = init_fn, summary_writer=summary_writer, ready_op = tf.report_uninitialized_variables(variables_to_save), global_step = trainer.global_step, save_model_secs=600, save_summaries_secs=120) num_global_steps = MAX_TRAIN_STEP logger.info( 'Starting session...\n'+'If this hangs, we are mostly likely waiting to ' + 'connect to the parameter server.' ) with sv.managed_session(server.target, config=config) as sess: sess.as_default() trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info('Starting training at step=%d'%global_step) while not sv.should_stop() and global_step < num_global_steps: trainer.process(sess) global_step = sess.run(trainer.global_step) sv.stop() logger.info('reached %s steps. worker stopped.' % global_step)
def __init__(self, thread_index, session, global_network, device, training=True): self.training = training self.thread_index = thread_index self.global_network = global_network self.device = device if self.training: #logs if not os.path.isdir(flags.log_dir + "/performance"): os.mkdir(flags.log_dir + "/performance") if not os.path.isdir(flags.log_dir + "/episodes"): os.mkdir(flags.log_dir + "/episodes") formatter = logging.Formatter('%(asctime)s %(message)s') # reward logger self.reward_logger = logging.getLogger('reward_' + str(thread_index)) hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_' + str(thread_index) + '.log') hdlr.setFormatter(formatter) self.reward_logger.addHandler(hdlr) self.reward_logger.setLevel(logging.DEBUG) self.max_reward = float("-inf") # build network self.environment = Environment.create_environment( flags.env_type, self.thread_index, self.training) state_shape = self.environment.get_state_shape() action_shape = self.environment.get_action_shape() concat_size = self.environment.get_concatenation_size( ) if flags.use_concatenation else 0 self.local_network = eval(self.get_model_manager())( session=session, device=self.device, id=self.thread_index, action_shape=action_shape, concat_size=concat_size, state_shape=state_shape, global_network=self.global_network, training=self.training) self.terminal = True self.local_t = 0 self.prev_local_t = 0 self.terminated_episodes = 0 self.stats = {}
def __init__(self, display_size,model): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('MAPREADER') self.action_size = Environment.get_action_size() self.global_network = model self.environment = Environment.create_environment(*DISPLAY_LEVEL) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.step_count = 0 self.episode_reward = 0 self.episode_intrinsic_reward = 0 self.state = self.environment.last_state self.replan = True self.path = [] self.maze_size = DISPLAY_LEVEL[0]//40*2+7
def test_process(self): environment = Environment.create_environment() action_size = Environment.get_action_size() for i in range(3): state, reward, terminal, pixel_change = environment.process(0) # Check shape self.assertTrue(state.shape == (84, 84, 3)) self.assertTrue(environment.last_state.shape == (84, 84, 3)) self.assertTrue(pixel_change.shape == (20, 20)) # state and pixel_change value range should be [0,1] self.assertTrue(np.amax(state) <= 1.0) self.assertTrue(np.amin(state) >= 0.0) self.assertTrue(np.amax(pixel_change) <= 1.0) self.assertTrue(np.amin(pixel_change) >= 0.0)
def __init__(self, configs): checkpoint = tf.train.get_checkpoint_state(flags.checkpoint_dir) if not checkpoint or not checkpoint.model_checkpoint_path: raise FileNotFoundError("a checkpoint is required, but none was found") os.makedirs(flags.log_dir, exist_ok=True) app = Application() app.sess = tf.Session() app.device = '/cpu:0' app.build_global_network(tf.placeholder(tf.float64)) app.trainers = [] app.load_checkpoint() self.app = app self.model = app.global_network self.environment = Environment.create_environment(flags.env_type, -1) self.episode_index = 1 super().__init__(configs)
def check_environment(self, env_type, env_name): environment = Environment.create_environment(env_type, env_name) # action_size = Environment.get_action_size(env_type, env_name) # Not used for i in range(3): state, reward, terminal, pixel_change = environment.process(0) # Check shape self.assertTrue(state.shape == (84, 84, 3)) self.assertTrue(environment.last_state.shape == (84, 84, 3)) self.assertTrue(pixel_change.shape == (20, 20)) # state and pixel_change value range should be [0,1] self.assertTrue(np.amax(state) <= 1.0) self.assertTrue(np.amin(state) >= 0.0) self.assertTrue(np.amax(pixel_change) <= 1.0) self.assertTrue(np.amin(pixel_change) >= 0.0) environment.stop()
def test_random_step(self): environment = Environment.create_environment() for i in range(3): observation = environment.random_step() self.assertTrue(observation.shape == (84, 84))
def prepare(self): self.environment = Environment.create_environment( self.env_type, self.env_name)
def prepare(self): self.environment = Environment.create_environment()
def prepare(self, termination_time=50.0, termination_dist_value=-10.0): self.environment = Environment.create_environment( self.env_type, self.env_name, self.termination_time, thread_index=self.thread_index)