Пример #1
0
 def __init__(self):
     self.action_size = Environment.get_action_size(flags.env_type,
                                                    flags.env_name)
     self.objective_size = Environment.get_objective_size(
         flags.env_type, flags.env_name)
     self.global_network = UnrealModel(self.action_size,
                                       self.objective_size,
                                       -1,
                                       flags.use_lstm,
                                       flags.use_pixel_change,
                                       flags.use_value_replay,
                                       flags.use_reward_prediction,
                                       0.0,
                                       0.0,
                                       "/cpu:0",
                                       for_display=True)
     self.environment = Environment.create_environment(
         flags.env_type,
         flags.env_name,
         env_args={
             'episode_schedule': flags.split,
             'log_action_trace': flags.log_action_trace,
             'seed': flags.seed,
             # 'max_states_per_scene': flags.episodes_per_scene,
             'episodes_per_scene_test': flags.episodes_per_scene
         })
     self.episode_reward = 0
     self.cnt_success = 0
Пример #2
0
	def build_global_network(self, learning_rate_input):
		environment = Environment.create_environment(flags.env_type, -1)
		state_shape = environment.get_state_shape()
		agents_count = environment.get_situations_count()
		action_size = environment.get_action_size()
		self.global_network = MultiAgentModel( -1, state_shape, agents_count, action_size, flags.entropy_beta, self.device )
		return RMSPropApplier(learning_rate = learning_rate_input, decay = flags.rmsp_alpha, momentum = 0.0, epsilon = flags.rmsp_epsilon, clip_norm = flags.grad_norm_clip, device = self.device)
Пример #3
0
    def __init__(self, display_size):
        pygame.init()

        self.surface = pygame.display.set_mode(display_size, 0, 24)
        pygame.display.set_caption('UNREAL')

        self.action_size = Environment.get_action_size(flags.env_type,
                                                       flags.env_name)
        self.objective_size = Environment.get_objective_size(
            flags.env_type, flags.env_name)
        self.global_network = UnrealModel(self.action_size,
                                          self.objective_size,
                                          -1,
                                          flags.use_lstm,
                                          flags.use_pixel_change,
                                          flags.use_value_replay,
                                          flags.use_reward_prediction,
                                          0.0,
                                          0.0,
                                          "/cpu:0",
                                          for_display=True)
        self.environment = Environment.create_environment(
            flags.env_type,
            flags.env_name,
            env_args={
                'episode_schedule': flags.split,
                'log_action_trace': flags.log_action_trace,
                'max_states_per_scene': flags.episodes_per_scene,
                'episodes_per_scene_test': flags.episodes_per_scene
            })
        self.font = pygame.font.SysFont(None, 20)
        self.value_history = ValueHistory()
        self.state_history = StateHistory()
        self.episode_reward = 0
Пример #4
0
  def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    self.action_size = Environment.get_action_size()
    self.local_network = UnrealModel(self.action_size, thread_index, device)
    self.local_network.prepare_loss()

    self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss,
                                                       global_network.get_vars(),
                                                       self.local_network.get_vars())
    
    self.sync = self.local_network.sync_from(global_network)
    self.environment = Environment.create_environment()
    self.experience = Experience(EXPERIENCE_HISTORY_SIZE)
    self.local_t = 0
    self.initial_learning_rate = initial_learning_rate
    self.episode_reward = 0
    # For log output
    self.prev_local_t = 0
Пример #5
0
    def __init__(self, thread_index, global_network, initial_learning_rate,
                 env_args, use_pixel_change, use_value_replay,
                 use_reward_prediction, pixel_change_lambda, entropy_beta,
                 local_t_max, gamma, gamma_pc, experience_history_size,
                 max_global_time_step, spatial_dim, optimizor):

        self.thread_index = thread_index
        self.env_args = env_args
        self.use_pixel_change = use_pixel_change
        self.use_value_replay = use_value_replay
        self.use_reward_prediction = use_reward_prediction
        self.local_t_max = local_t_max
        self.gamma = gamma
        self.gamma_pc = gamma_pc
        self.experience_history_size = experience_history_size
        self.max_global_time_step = max_global_time_step
        self.action_size = Environment.get_action_size()
        self.local_network = Agent(thread_index, use_pixel_change,
                                   use_value_replay, use_reward_prediction,
                                   pixel_change_lambda, entropy_beta)

        self.global_network = global_network
        self.experience = Experience(self.experience_history_size)
        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate
        self.episode_reward = 0
        self.spatial_dim = spatial_dim
        self.obs_processer = ObsProcesser()
        self.action_processer = ActionProcesser(dim=spatial_dim)
        self.optimizor = optimizor
        self.distribution = th.distributions.Categorical
        # For log output
        self.prev_local_t = 0
        self.environment = Environment.create_environment(self.env_args)
Пример #6
0
 def prepare(self):
     if self.running:
         self.environment = Environment.create_environment(
             self.maze_size, self.level_seed)
         print('Started trainer ', self.thread_index)
         self.apply_next_location_loss = 0.0
         sys.stdout.flush()
	def __init__(self, model_size, group_id, environment_id=0, training=True):
		self.model_size = model_size
		self._training = training
		self.environment_id = environment_id
		self.group_id = group_id
		# Build environment
		self.environment = Environment.create_environment(flags.env_type, self.environment_id, self._training)
		self.extrinsic_reward_manipulator = eval(flags.extrinsic_reward_manipulator)
		self.terminal = True
		self._composite_batch = CompositeBatch(maxlen=flags.replay_buffer_size if flags.replay_mean > 0 else 1)
		# Statistics
		self.__client_statistics = Statistics(flags.episode_count_for_evaluation)
		if self._training:
			#logs
			if not os.path.isdir(flags.log_dir + "/performance"):
				os.mkdir(flags.log_dir + "/performance")
			if not os.path.isdir(flags.log_dir + "/episodes"):
				os.mkdir(flags.log_dir + "/episodes")
			formatter = logging.Formatter('%(asctime)s %(message)s')
			# reward logger
			self.__reward_logger = logging.getLogger('reward_{}_{}'.format(self.group_id, self.environment_id))
			hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_{}_{}.log'.format(self.group_id, self.environment_id))
			hdlr.setFormatter(formatter)
			self.__reward_logger.addHandler(hdlr) 
			self.__reward_logger.setLevel(logging.DEBUG)
			self.__max_reward = float("-inf")
Пример #8
0
 def __init__(self):
     self.action_size = Environment.get_action_size(flags.env_type,
                                                    flags.env_name)
     self.objective_size = Environment.get_objective_size(
         flags.env_type, flags.env_name)
     print('flags:use_pixel_change {}'.format(flags.use_pixel_change))
     sleep(10)
     self.global_network = UnrealModel(self.action_size,
                                       self.objective_size,
                                       -1,
                                       flags.use_lstm,
                                       flags.use_pixel_change,
                                       flags.use_value_replay,
                                       flags.use_reward_prediction,
                                       0.0,
                                       0.0,
                                       "/cpu:0",
                                       for_display=True)
     self.environment = Environment.create_environment(
         flags.env_type,
         flags.env_name,
         env_args={
             'episode_schedule': flags.split,
             'log_action_trace': flags.log_action_trace,
             'max_states_per_scene': flags.episodes_per_scene,
             'episodes_per_scene_test': flags.episodes_per_scene
         })
     print('\n======\nENV in Evaluate::ctor')
     print(self.environment)
     print(self.global_network)
     print('val_replay!!! {}'.format(flags.use_value_replay))
     print(flags.split)
     print('=======\n')
     sleep(10)
     self.episode_reward = 0
Пример #9
0
    def __init__(self, thread_index, global_network, initial_learning_rate,
                 learning_rate_input, grad_applier, env_type, entropy_beta,
                 local_t_max, gamma, max_global_time_step, device):
        self.stats = {}
        self.thread_index = thread_index
        self.global_network = global_network
        self.grad_applier = grad_applier
        #logs
        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')

        self.info_logger = logging.getLogger('info_' + str(thread_index))
        hdlr = logging.FileHandler(flags.log_dir + '/performance/info_' +
                                   str(thread_index) + '.log')
        hdlr.setFormatter(formatter)
        self.info_logger.addHandler(hdlr)
        self.info_logger.setLevel(logging.DEBUG)

        self.reward_logger = logging.getLogger('reward_' + str(thread_index))
        hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_' +
                                   str(thread_index) + '.log')
        hdlr.setFormatter(formatter)
        self.reward_logger.addHandler(hdlr)
        self.reward_logger.setLevel(logging.DEBUG)

        self.max_reward = float("-inf")
        #trainer
        self.learning_rate_input = learning_rate_input
        self.env_type = env_type
        self.local_t_max = local_t_max
        self.gamma = gamma
        self.environment = Environment.create_environment(
            self.env_type, self.thread_index)
        self.action_size = self.environment.get_action_size()
        state_shape = self.environment.get_state_shape()
        agents_count = self.environment.get_situations_count()
        self.max_global_time_step = max_global_time_step
        self.entropy_beta = entropy_beta
        self.device = device
        # build network
        self.local_network = MultiAgentModel(self.thread_index, state_shape,
                                             agents_count, self.action_size,
                                             self.entropy_beta, self.device)
        self.apply_gradients = []
        self.sync = []
        for i in range(self.local_network.agent_count):
            local_agent = self.local_network.get_agent(i)
            global_agent = self.global_network.get_agent(i)
            local_agent.prepare_loss()
            self.apply_gradients.append(
                self.grad_applier.minimize_local(local_agent.total_loss,
                                                 global_agent.get_vars(),
                                                 local_agent.get_vars()))
            self.sync.append(local_agent.sync_from(global_agent))
        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate
        # For log output
        self.prev_local_t = 0
Пример #10
0
 def __init__(self):
     self.env = Environment.create_environment()
     if os.path.exists('human_exp.pkl'):
         with open('human_exp.pkl', 'r') as f:
             self.ExpPool = pkl.load(f)
     else:
         self.ExpPool = Experience(MAX_EXP)
     pygame.init()
     self.surface = pygame.display.set_mode(DISP_SIZE, 0)
     pygame.display.set_caption('Recorder')
    def test(self):
        result_file = '{}/test_results_{}.log'.format(flags.log_dir,
                                                      self.global_step)
        if os.path.exists(result_file):
            print('Test results already produced and evaluated for {}'.format(
                result_file))
            return
        result_lock = RLock()

        print('Start testing')
        testers = []
        threads = []
        tf_session = tf.get_default_session()
        tmp_environment = Environment.create_environment(
            env_type=flags.env_type, training=False)
        dataset_size = tmp_environment.get_dataset_size()
        data_per_thread = max(1, dataset_size // self.thread_count)
        for i in range(self.thread_count):  # parallel testing
            tester = Group(group_id=-(i + 1),
                           environment_count=data_per_thread,
                           global_network=self.global_network,
                           training=False)
            data_range_start = i * data_per_thread
            data_range_end = data_range_start + data_per_thread
            # print(data_range_start, data_per_thread, dataset_size)
            thread = Thread(target=self.test_function,
                            args=(result_file, result_lock, tester,
                                  (data_range_start,
                                   data_range_end), tf_session))
            thread.start()
            threads.append(thread)
            testers.append(tester)
        print('Test Set size:', dataset_size)
        print('Tests per thread:', data_per_thread)
        time.sleep(5)
        for thread in threads:  # wait for all threads to end
            thread.join()
        print('End testing')
        # get overall statistics
        test_statistics = Statistics(self.thread_count)
        for group in testers:
            test_statistics.add(group.get_statistics())
        info = test_statistics.get()
        # write results to file
        stats_file = '{}/test_statistics.log'.format(flags.log_dir)
        with open(stats_file, "a",
                  encoding="utf-8") as file:  # write stats to file
            file.write('{}\n'.format([
                "{}={}".format(key, value)
                for key, value in sorted(info.items(), key=lambda t: t[0])
            ]))
        print('Test statistics saved in {}'.format(stats_file))
        print('Test results saved in {}'.format(result_file))
        return tmp_environment.evaluate_test_results(result_file)
Пример #12
0
 def __init__(self):
     self.img = np.zeros(shape=(HEIGHT, WIDTH, 3), dtype=np.uint8)
     self.action_size = Environment.get_action_size()
     self.global_network = UnrealModel(self.action_size,
                                       -1,
                                       "/cpu:0",
                                       for_display=True)
     self.env = Environment.create_environment()
     self.value_history = ValueHistory()
     self.state_history = StateHistory()
     self.ep_reward = 0
     self.mazemap = MazeMap()
Пример #13
0
    def __init__(self, display_size):
        pygame.init()

        self.surface = pygame.display.set_mode(display_size, 0, 24)
        name = 'UNREAL' if flags.segnet == 0 else "A3C ErfNet"
        pygame.display.set_caption(name)

        env_config = sim_config.get(flags.env_name)
        self.image_shape = [
            env_config.get('height', 88),
            env_config.get('width', 88)
        ]
        segnet_param_dict = {'segnet_mode': flags.segnet}
        is_training = tf.placeholder(tf.bool, name="training")
        map_file = env_config.get('objecttypes_file', '../../objectTypes.csv')
        self.label_mapping = pd.read_csv(map_file, sep=',', header=0)
        self.get_col_index()

        self.action_size = Environment.get_action_size(flags.env_type,
                                                       flags.env_name)
        self.objective_size = Environment.get_objective_size(
            flags.env_type, flags.env_name)
        self.global_network = UnrealModel(self.action_size,
                                          self.objective_size,
                                          -1,
                                          flags.use_lstm,
                                          flags.use_pixel_change,
                                          flags.use_value_replay,
                                          flags.use_reward_prediction,
                                          0.0,
                                          0.0,
                                          "/gpu:0",
                                          segnet_param_dict=segnet_param_dict,
                                          image_shape=self.image_shape,
                                          is_training=is_training,
                                          n_classes=flags.n_classes,
                                          segnet_lambda=flags.segnet_lambda,
                                          dropout=flags.dropout,
                                          for_display=True)
        self.environment = Environment.create_environment(
            flags.env_type,
            flags.env_name,
            flags.termination_time_sec,
            env_args={
                'episode_schedule': flags.split,
                'log_action_trace': flags.log_action_trace,
                'max_states_per_scene': flags.episodes_per_scene,
                'episodes_per_scene_test': flags.episodes_per_scene
            })
        self.font = pygame.font.SysFont(None, 20)
        self.value_history = ValueHistory()
        self.state_history = StateHistory()
        self.episode_reward = 0
 def build_global_network(self, learning_rate_input):
     environment = Environment.create_environment(flags.env_type,
                                                  -1,
                                                  self.training_set,
                                                  shuffle=False)
     self.global_network = ModelManager(-1, environment,
                                        learning_rate_input, self.device)
     # return gradient optimizer
     return RMSPropApplier(learning_rate=learning_rate_input,
                           decay=flags.rmsp_alpha,
                           momentum=0.0,
                           epsilon=flags.rmsp_epsilon,
                           clip_norm=flags.grad_norm_clip,
                           device=self.device)
Пример #15
0
 def __init__(self,
              group_id,
              environment_count,
              global_network,
              training=True):
     self.group_id = group_id
     self.training = training
     # Get environment info
     tmp_environment = Environment.create_environment(
         env_type=flags.env_type, training=training)
     self.environment_info = {
         'state_shape': tmp_environment.get_state_shape(),
         'action_shape': tmp_environment.get_action_shape(),
         'state_scaler': tmp_environment.state_scaler,
         'has_masked_actions': tmp_environment.has_masked_actions(),
     }
     # Build network_manager
     self.network_manager = NetworkManager(
         group_id=self.group_id,
         environment_info=self.environment_info,
         global_network=global_network,
         training=self.training)
     # Build environments
     self.environment_count = environment_count
     self.worker_list = [
         EnvironmentManager(model_size=self.network_manager.model_size,
                            environment_id=env_id,
                            group_id=group_id,
                            training=training)
         for env_id in range(self.environment_count)
     ]
     # State distribution estimator
     self.state_distribution_estimator = [
         RunningMeanStd(batch_size=flags.batch_size, shape=shape)
         for shape in self.environment_info['state_shape']
     ]
     self.network_manager.state_mean = [
         estimator.mean for estimator in self.state_distribution_estimator
     ]
     self.network_manager.state_std = [
         estimator.std for estimator in self.state_distribution_estimator
     ]
     ImportantInformation(
         self.state_distribution_estimator,
         'state_distribution_estimator{}'.format(self.group_id))
     # Statistics
     self.group_statistics = IndexedStatistics(
         max_count=self.environment_count, buffer_must_be_full=True)
     self.has_terminal_worker = False
     self.terminated_episodes = 0
Пример #16
0
 def __init__(self, args, display_size, saver):
     pygame.init()
     self.args = args
     self.surface = pygame.display.set_mode(display_size, 0, 24)
     pygame.display.set_caption('UNREAL')
     args.action_size = Environment.get_action_size(args.env_name)
     self.global_network = Agent(1, args)
     saver.restore(self.global_network)
     self.global_network.eval()
     self.environment = Environment.create_environment(args.env_name)
     self.font = pygame.font.SysFont(None, 20)
     self.value_history = ValueHistory()
     self.state_history = StateHistory()
     self.distribution = torch.distributions.Categorical
     self.episode_reward = 0
    def test_step(self):
        environment = Environment.create_environment()
        action_size = Environment.get_action_size()

        if sys.platform == 'darwin':
            self.assertTrue(action_size == 6)
        else:
            self.assertTrue(action_size == 8)

        for i in range(3):
            self.assertTrue(environment.last_observation.shape == (84, 84))
            if SAVE_IMAGE:
                scipy.misc.imsave("debug_observation{0}.png".format(i),
                                  environment.last_observation)
            reward, terminal = environment.step(0)
Пример #18
0
  def __init__(self):
    self.action_size = Environment.get_action_size(flags.env_type, flags.env_name)
    self.objective_size = Environment.get_objective_size(flags.env_type, flags.env_name)

    env_config = sim_config.get(flags.env_name)
    self.image_shape = [env_config['height'], env_config['width']]
    segnet_param_dict = {'segnet_mode': flags.segnet}
    is_training = tf.placeholder(tf.bool, name="training") # for display param in UnrealModel says its value

    self.global_network = UnrealModel(self.action_size,
                                      self.objective_size,
                                      -1,
                                      flags.use_lstm,
                                      flags.use_pixel_change,
                                      flags.use_value_replay,
                                      flags.use_reward_prediction,
                                      0.0, #flags.pixel_change_lambda
                                      0.0, #flags.entropy_beta
                                      device,
                                      segnet_param_dict=segnet_param_dict,
                                      image_shape=self.image_shape,
                                      is_training=is_training,
                                      n_classes=flags.n_classes,
                                      segnet_lambda=flags.segnet_lambda,
                                      dropout=flags.dropout,
                                      for_display=True)
    self.environment = Environment.create_environment(flags.env_type, flags.env_name, flags.termination_time_sec,
                                                      env_args={'episode_schedule': flags.split,
                                                                'log_action_trace': flags.log_action_trace,
                                                                'max_states_per_scene': flags.episodes_per_scene,
                                                                'episodes_per_scene_test': flags.episodes_per_scene})

    self.global_network.prepare_loss()

    self.total_loss = []
    self.segm_loss = []
    self.episode_reward = [0]
    self.episode_roomtype = []
    self.roomType_dict  = {}
    self.segnet_class_dict = {}
    self.success_rate = []
    self.batch_size = 20
    self.batch_cur_num = 0
    self.batch_prev_num = 0
    self.batch_si = []
    self.batch_sobjT = []
    self.batch_a = []
    self.batch_reward = []
Пример #19
0
    def __init__(self, display_size):
        pygame.init()

        self.surface = pygame.display.set_mode(display_size, 0, 24)
        pygame.display.set_caption('UNREAL')

        self.action_size = Environment.get_action_size()
        self.global_network = UnrealModel(self.action_size,
                                          -1,
                                          "/cpu:0",
                                          for_display=True)
        self.environment = Environment.create_environment()
        self.font = pygame.font.SysFont(None, 20)
        self.value_history = ValueHistory()
        self.state_history = StateHistory()
        self.episode_reward = 0
Пример #20
0
    def check_environment(self, env_type, env_name):
        env = Environment.create_environment(env_type, env_name, 0)
        action_size = Environment.get_action_size(env_type, env_name)

        for i in range(3):
            state, reward, terminal = env.process(0)

            print(state)
            print(reward)
            print(terminal)
            # # Check shape
            # self.assertTrue(state.shape == (84, 84, 3))
            # # state and pixel_change value range should be [0,1]
            # self.assertTrue(np.amax(state) <= 1.0)

        env.stop()
Пример #21
0
def run(args, server):
  # create an environment chosed by global ENV_TYPE
  env = Environment.create_environment()
  trainer = UNREAL(env, args.task, args.visualise)

  variables_to_save = [v for v in tf.global_variables() if not v.name.startswith('local')]
  init_op = tf.variables_initializer(variables_to_save)
  init_all_op = tf.global_variables_initializer()
  saver = tf.train.Saver(variables_to_save)

  def init_fn(sess):
    logger.info('Initializing all parameters...')
    sess.run(init_all_op)

  config = tf.ConfigProto(device_filters=['/job:ps', '/job:worker/task:{}'.format(args.task)])
  logdir = os.path.join(args.log_dir, 'train')
  summary_writer = tf.summary.FileWriter(logdir + '_%d' % args.task)
  logger.info('Event directory: %s_%s', logdir, args.task)
  
  sv = tf.train.Supervisor(is_chief = (args.task == 0),
                           logdir = logdir,
                           saver = saver,
                           summary_op = None,
                           init_op = init_op,
                           init_fn = init_fn,
                           summary_writer=summary_writer,
                           ready_op = tf.report_uninitialized_variables(variables_to_save),
                           global_step = trainer.global_step,
                           save_model_secs=600,
                           save_summaries_secs=120)
  
  num_global_steps = MAX_TRAIN_STEP

  logger.info(
    'Starting session...\n'+'If this hangs, we are mostly likely waiting to ' +
    'connect to the parameter server.'
  )
  with sv.managed_session(server.target, config=config) as sess:
    sess.as_default()
    trainer.start(sess, summary_writer)
    global_step = sess.run(trainer.global_step)
    logger.info('Starting training at step=%d'%global_step)
    while not sv.should_stop() and global_step < num_global_steps:
      trainer.process(sess)
      global_step = sess.run(trainer.global_step)
    sv.stop()
    logger.info('reached %s steps. worker stopped.' % global_step)
 def __init__(self,
              thread_index,
              session,
              global_network,
              device,
              training=True):
     self.training = training
     self.thread_index = thread_index
     self.global_network = global_network
     self.device = device
     if self.training:
         #logs
         if not os.path.isdir(flags.log_dir + "/performance"):
             os.mkdir(flags.log_dir + "/performance")
         if not os.path.isdir(flags.log_dir + "/episodes"):
             os.mkdir(flags.log_dir + "/episodes")
         formatter = logging.Formatter('%(asctime)s %(message)s')
         # reward logger
         self.reward_logger = logging.getLogger('reward_' +
                                                str(thread_index))
         hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_' +
                                    str(thread_index) + '.log')
         hdlr.setFormatter(formatter)
         self.reward_logger.addHandler(hdlr)
         self.reward_logger.setLevel(logging.DEBUG)
         self.max_reward = float("-inf")
     # build network
     self.environment = Environment.create_environment(
         flags.env_type, self.thread_index, self.training)
     state_shape = self.environment.get_state_shape()
     action_shape = self.environment.get_action_shape()
     concat_size = self.environment.get_concatenation_size(
     ) if flags.use_concatenation else 0
     self.local_network = eval(self.get_model_manager())(
         session=session,
         device=self.device,
         id=self.thread_index,
         action_shape=action_shape,
         concat_size=concat_size,
         state_shape=state_shape,
         global_network=self.global_network,
         training=self.training)
     self.terminal = True
     self.local_t = 0
     self.prev_local_t = 0
     self.terminated_episodes = 0
     self.stats = {}
Пример #23
0
  def __init__(self, display_size,model):
    pygame.init()
    self.surface = pygame.display.set_mode(display_size, 0, 24)
    pygame.display.set_caption('MAPREADER')

    self.action_size = Environment.get_action_size()
    self.global_network = model
    self.environment = Environment.create_environment(*DISPLAY_LEVEL)
    self.font = pygame.font.SysFont(None, 20)
    self.value_history = ValueHistory()
    self.step_count = 0
    self.episode_reward = 0
    self.episode_intrinsic_reward = 0
    self.state = self.environment.last_state
    self.replan = True
    self.path = []
    self.maze_size = DISPLAY_LEVEL[0]//40*2+7
Пример #24
0
    def test_process(self):
        environment = Environment.create_environment()
        action_size = Environment.get_action_size()

        for i in range(3):
            state, reward, terminal, pixel_change = environment.process(0)

            # Check shape
            self.assertTrue(state.shape == (84, 84, 3))
            self.assertTrue(environment.last_state.shape == (84, 84, 3))
            self.assertTrue(pixel_change.shape == (20, 20))

            # state and pixel_change value range should be [0,1]
            self.assertTrue(np.amax(state) <= 1.0)
            self.assertTrue(np.amin(state) >= 0.0)
            self.assertTrue(np.amax(pixel_change) <= 1.0)
            self.assertTrue(np.amin(pixel_change) >= 0.0)
Пример #25
0
    def __init__(self, configs):
        checkpoint = tf.train.get_checkpoint_state(flags.checkpoint_dir)
        if not checkpoint or not checkpoint.model_checkpoint_path:
            raise FileNotFoundError("a checkpoint is required, but none was found")
        os.makedirs(flags.log_dir, exist_ok=True)
        app = Application()
        app.sess = tf.Session()
        app.device = '/cpu:0'
        app.build_global_network(tf.placeholder(tf.float64))
        app.trainers = []
        app.load_checkpoint()
        self.app = app
        self.model = app.global_network
        self.environment = Environment.create_environment(flags.env_type, -1)

        self.episode_index = 1

        super().__init__(configs)
Пример #26
0
    def check_environment(self, env_type, env_name):
        environment = Environment.create_environment(env_type, env_name)
        # action_size = Environment.get_action_size(env_type, env_name) # Not used

        for i in range(3):
            state, reward, terminal, pixel_change = environment.process(0)

            # Check shape
            self.assertTrue(state.shape == (84, 84, 3))
            self.assertTrue(environment.last_state.shape == (84, 84, 3))
            self.assertTrue(pixel_change.shape == (20, 20))

            # state and pixel_change value range should be [0,1]
            self.assertTrue(np.amax(state) <= 1.0)
            self.assertTrue(np.amin(state) >= 0.0)
            self.assertTrue(np.amax(pixel_change) <= 1.0)
            self.assertTrue(np.amin(pixel_change) >= 0.0)

        environment.stop()
    def test_random_step(self):
        environment = Environment.create_environment()

        for i in range(3):
            observation = environment.random_step()
            self.assertTrue(observation.shape == (84, 84))
Пример #28
0
 def prepare(self):
     self.environment = Environment.create_environment(
         self.env_type, self.env_name)
Пример #29
0
 def prepare(self):
     self.environment = Environment.create_environment()
Пример #30
0
 def prepare(self, termination_time=50.0, termination_dist_value=-10.0):
     self.environment = Environment.create_environment(
         self.env_type,
         self.env_name,
         self.termination_time,
         thread_index=self.thread_index)