Пример #1
0
    def load_model(self, controller_weights=None):
        p = self.params
        self.action_utils = ActionUtils(p['env_name'])
        self.action_size = self.action_utils.action_size()

        self.vae = VAE()
        sys.stdout = open(os.devnull, 'w')
        self.vae.make_vae_shape(p['img_size'], p['img_size'], p['latent_size'])
        sys.stdout = sys.__stdout__
        self.vae.load_model('../' + p['vae_hps']['weights_path'])

        # TODO: Make MDN just take in all of params.
        mdn_hps = p['mdn_hps']
        mdn_hps['max_seq_len'] = p['max_seq_len']
        mdn_hps['in_width'] = p['latent_size'] + self.action_size
        mdn_hps['out_width'] = p['latent_size']
        mdn_hps['action_size'] = self.action_size
        mdn_hps['rnn_size'] = p['hidden_size']
        mdn_hps['batch_size'] = 1
        mdn_hps['max_seq_len'] = 1
        mdn_hps['use_recurrent_dropout'] = 0
        mdn_hps['training'] = 0
        # self.mdn_rnn = MDNRNN(mdn_hps)
        # hps_inf = MDNRNN.set_hps_to_inference(hps)
        self.mdn_rnn = MDNRNN(mdn_hps)
        self.mdn_rnn.load('../' + p['mdn_hps']['weights_path'])

        self.controller = ControllerModel(
            [p['latent_size'] + p['hidden_size'], self.action_size])
        if controller_weights:
            self.controller.load_weights(controller_weights)
Пример #2
0
def main():
    # Load the HyperParameters
    params = json.load(open(CONFIG_PATH))[0]
    utils = ActionUtils(params['env_name'])
    action_size = utils.action_size()
    params['action_size'] = action_size
    mdn_hps = params['mdn_hps']
    mdn_hps['max_seq_len'] = params['max_seq_len']
    mdn_hps['in_width'] = params['latent_size'] + action_size
    mdn_hps['out_width'] = params['latent_size']
    mdn_hps['action_size'] = action_size
    mdn_hps['rnn_size'] = params['hidden_size']
    mdn_hps = MDNRNN.set_hps_to_inference(mdn_hps)

    # Create the MDN and load the params
    mdnrnn = MDNRNN(mdn_hps)
    mdnrnn.load(MDNRNN_PATH)

    # Create the VAE
    vae = VAE()
    vae.make_vae_shape(params['img_size'], params['img_size'],
                       params['latent_size'])
    vae.load_model(VAE_PATH)

    # Create the Gym Env
    env = gym.make(params['env_name'])

    dream_vis(env, mdnrnn, vae, params, mdn_hps, "dream_1")
    dream_vis(env, mdnrnn, vae, params, mdn_hps, "dream_2")
    dream_vis(env, mdnrnn, vae, params, mdn_hps, "dream_3")

    frame_vis(env, mdnrnn, vae, params, mdn_hps, "cmp_1")
    frame_vis(env, mdnrnn, vae, params, mdn_hps, "cmp_2")
	def __init__(self):
		super(TableQLearnAgent, self).__init__()
		self.utils = ActionUtils()
		self.qlearn = QLearningTable(actions=list(range(len(high_actions))))
		self.output_file = 'model/table_q_learn_agent.csv'
		self.output_actions_prefix = 'games/game_actions_%d.csv'
		self.output_game_outcome_file = 'game_state/game_outcome.csv'
		
		if os.path.exists(self.output_game_outcome_file):
			self.game_outcomes = pd.read_csv(self.output_game_outcome_file).to_dict('records')
		else:
			self.game_outcomes = []

		self.game_count = len([f for f in os.listdir('games') if os.path.splitext(f)[1] == ".csv"])
		if os.path.isfile(self.output_file + '.gz'):
			self.qlearn.q_table = pd.read_pickle(self.output_file + '.gz', compression='gzip')
Пример #4
0
 def callback(self, message):
     action = ActionUtils.decode_action_from_json(message.data)
     try:
         if self.is_view_action(action):
             self._view_lock.acquire()
             self._view_ids.add(message.message_id)
             self._view_lock.release()
         message.ack()
     except Exception as e:
         print("ERROR " + str(e))
 def callback(self, message):
   self._view_lock.acquire()
   self._view_lock.release()
   action = ActionUtils.decode_action_from_json(message.data)
   try:
     if self.is_view_action(action):
       self._view_lock.acquire()
       self._view_ids.add(message.message_id)
       self._view_lock.release()
     message.ack()
   except Exception as e:
     print("ERROR " + str(e))
Пример #6
0
  def publish(self, action):
    data = ActionUtils.encode_action_as_json(action)
    data = data.encode('utf-8')

    message_future = self._publisher.publish(self._topic_path, data=data)
    message_future.add_done_callback(self.callback)
Пример #7
0
def train(json_path, use_previous_dataset=False, use_trained_vae=False):
    # TODO: suppress VAE loading print statements
    params = json.load(open(json_path))[0]

    print("Extracting data...")
    # TODO: save data across more files? and all in some specific folder too
    if not use_previous_dataset:
        img_path_name, action_path_name = extract(params['env_name'],
                                                  params['num_eps'],
                                                  params['max_seq_len'],
                                                  False,
                                                  params['img_size'],
                                                  path=params['dataset_path'])
    else:
        print("Using previously trained dataset.")
        img_path_name, action_path_name = get_path_names(
            params['dataset_path'], params['env_name'], params['num_eps'],
            params['max_seq_len'])
        if not os.path.isfile(img_path_name +
                              ".npz") or not os.path.isfile(action_path_name +
                                                            ".npz"):
            return print(
                "ERROR: One or more of the previously trained dataset paths \
    			(`{}` or `{}`) does not exist".format(img_path_name, action_path_name))
    print("Data extraction finished.")

    vae_path = params['vae_hps']['weights_path']
    if use_trained_vae:
        if not os.path.isfile(vae_path):
            return print(
                "ERROR: No file exists at the VAE model path you passed (`{}`)"
                .format(vae_path))
    # print("Using previously trained VAE.")
    else:
        print("Training VAE...")
        convVae = VAE()
        sys.stdout = open(os.devnull, 'w')
        convVae.make_vae(img_path_name + ".npz", params['latent_size'])
        sys.stdout = sys.__stdout__
        convVae.model_name = vae_path
        convVae.epochs = params['vae_hps']['epochs']
        convVae.train_vae()

    print("Processing images for MDN input...")
    vae_process_images(img_path_name,
                       vae_path,
                       params['latent_size'],
                       decode=False,
                       image_size=params['img_size'])

    print("Formatting MDN training data...")
    latent_path_name = img_path_name + '_latent.npz'
    latent = np.load(latent_path_name)
    act = np.load(action_path_name + '.npz')

    combined_input = []
    combined_output = []

    utils = ActionUtils(params['env_name'])
    action_size = utils.action_size()
    # TODO: Save in batches?
    for f in latent.files:
        c = np.concatenate(
            [latent[f],
             np.array([utils.action_to_input(a) for a in act[f]])],
            axis=1)
        missing = params['max_seq_len'] + 1 - c.shape[0]
        c = np.concatenate(
            [c, np.zeros((missing, params['latent_size'] + action_size))],
            axis=0)
        combined_input.append(c[:-1])
        combined_output.append(c[1:, :-action_size])

    np.save('LunarLander_MDN_in', combined_input)
    np.save('LunarLander_MDN_out', combined_output)

    # MDN Parameters
    # TODO: Change MDN to just take in entire params dictionary
    print("Configuring MDN...")
    mdn_hps = params['mdn_hps']
    mdn_hps['max_seq_len'] = params['max_seq_len']
    mdn_hps['in_width'] = params['latent_size'] + action_size
    mdn_hps['out_width'] = params['latent_size']
    mdn_hps['action_size'] = action_size
    mdn_hps['rnn_size'] = params['hidden_size']

    mdnrnn = MDNRNN(mdn_hps)
    print("Finished building MDN, starting training...")

    mdnrnn.train(np.array(combined_input), np.array(combined_output))
    print("Finished training MDN.")
    mdnrnn.save(params['mdn_hps']['weights_path'])
class TableQLearnAgent(base_agent.BaseAgent):

	def __init__(self):
		super(TableQLearnAgent, self).__init__()
		self.utils = ActionUtils()
		self.qlearn = QLearningTable(actions=list(range(len(high_actions))))
		self.output_file = 'model/table_q_learn_agent.csv'
		self.output_actions_prefix = 'games/game_actions_%d.csv'
		self.output_game_outcome_file = 'game_state/game_outcome.csv'
		
		if os.path.exists(self.output_game_outcome_file):
			self.game_outcomes = pd.read_csv(self.output_game_outcome_file).to_dict('records')
		else:
			self.game_outcomes = []

		self.game_count = len([f for f in os.listdir('games') if os.path.splitext(f)[1] == ".csv"])
		if os.path.isfile(self.output_file + '.gz'):
			self.qlearn.q_table = pd.read_pickle(self.output_file + '.gz', compression='gzip')

	def reset(self):
		super(TableQLearnAgent, self).reset()
		self.previous_action = None
		self.previous_state = None
		self.queue = deque()
		self.game_actions = []
		self.game_count += 1
		print('Game %d' % (self.game_count))

	def create_state(self, obs):
		command_center_count = len(self.utils.get_units(obs, units.Terran.CommandCenter))
		barracks_count = len(self.utils.get_units(obs, units.Terran.Barracks))
		barracks_tech_lab_count = self.utils.count_units(obs, units.Terran.BarracksTechLab)
		supply_depot_count = len(self.utils.get_units(obs, units.Terran.SupplyDepot))
		refinery_count = len(self.utils.get_units(obs, units.Terran.Refinery))
		eng_bay_count = len(self.utils.get_units(obs, units.Terran.EngineeringBay))
		factory_count = len(self.utils.get_units(obs, units.Terran.Factory))
		factory_tech_lab_count = len(self.utils.get_units(obs, units.Terran.FactoryTechLab))
		army_supply = obs.observation.player.food_army
		worker_supply = obs.observation.player.food_workers
		supply_free = obs.observation.player.food_cap - obs.observation.player.food_used
		vespene = obs.observation.player.vespene

		state_elements = 8
		current_state = np.zeros(8 + 4 + 4)
		current_state[0] = command_center_count
		current_state[1] = supply_depot_count
		current_state[2] = barracks_count
		current_state[3] = refinery_count
		current_state[4] = eng_bay_count
		current_state[5] = factory_count
		current_state[6] = army_supply
		current_state[7] = worker_supply

		# current_state[3] = len(self.utils.get_units(obs, units.Terran.EngineeringBay))
		# current_state[4] = len(self.utils.get_units(obs, units.Terran.Factory))
		# current_state[2] = len(self.utils.get_units(obs, units.Terran.SCV))
		# current_state[3] = obs.observation.player.food_cap
		# current_state[6] = obs.observation.player.food_used / (obs.observation.player.food_cap + 0.01)
		# current_state[4] = obs.observation.player.army_count
		# current_state[0] = self.quantize(obs.observation.player.minerals)
		# current_state[0] = self.quantize(obs.observation.player.vespene)

		hot_squares = np.zeros(4)
		player_relative = obs.observation.feature_minimap.player_relative  
		enemy_y, enemy_x = (player_relative == features.PlayerRelative.ENEMY).nonzero()
		for i in range(0, len(enemy_y)):
			y = int(math.ceil((enemy_y[i] + 1) / 32))
			x = int(math.ceil((enemy_x[i] + 1) / 32))
			
			hot_squares[((y - 1) * 2) + (x - 1)] = 1

		if not self.base_top_left:
			hot_squares = hot_squares[::-1]

		for i in range(0, 4):
			current_state[i + state_elements] = hot_squares[i]

		state_elements += 4

		green_squares = np.zeros(4)        
		friendly_y, friendly_x = (player_relative == features.PlayerRelative.SELF).nonzero()
		for i in range(0, len(friendly_y)):
			y = int(math.ceil((friendly_y[i] + 1) / 32))
			x = int(math.ceil((friendly_x[i] + 1) / 32))
			
			green_squares[((y - 1) * 2) + (x - 1)] = 1

		if not self.base_top_left:
			green_squares = green_squares[::-1]

		for i in range(0, 4):
			current_state[i + state_elements] = green_squares[i]

		excluded_actions = []

		if command_center_count >= 1:
			excluded_actions.append(high_actions.index('BUILD_COMMANDCENTER'))

		if vespene == 0 or barracks_count == 0:
			excluded_actions.append(high_actions.index('BUILD_BARRACKS_TECH_LAB'))

		if vespene == 0 or factory_count == 0:
			excluded_actions.append(high_actions.index('BUILD_FACTORY_TECH_LAB'))

		if factory_count >= 1 or worker_supply == 0:
			excluded_actions.append(high_actions.index('BUILD_FACTORY'))

		if eng_bay_count >= 1 or worker_supply == 0:
			excluded_actions.append(high_actions.index('BUILD_ENGBAY'))

		if refinery_count >= 2 or worker_supply == 0:
			excluded_actions.append(high_actions.index('BUILD_REFINERY'))

		if supply_depot_count >= 5 or worker_supply == 0:
			excluded_actions.append(high_actions.index('BUILD_SUPPLYDEPOT'))

		if supply_depot_count == 0 or barracks_count >= 2 or worker_supply == 0:
			excluded_actions.append(high_actions.index('BUILD_BARRACKS'))

		if supply_free == 0 or barracks_count == 0 or barracks_tech_lab_count == 0:
			excluded_actions.append(high_actions.index('TRAIN_BARRACKS_MARINE'))
			excluded_actions.append(high_actions.index('TRAIN_BARRACKS_REAPER'))

		if supply_free == 0 or barracks_count == 0 or barracks_tech_lab_count == 0 or vespene == 0:
			excluded_actions.append(high_actions.index('TRAIN_BARRACKS_MARAUDER'))

		if supply_free == 0 or factory_count == 0 or factory_tech_lab_count == 0:
			excluded_actions.append(high_actions.index('TRAIN_FACTORY_HELLION'))

		if supply_free == 0 or factory_count == 0 or factory_tech_lab_count == 0 or vespene == 0:
			excluded_actions.append(high_actions.index('TRAIN_FACTORY_TANK'))

		if army_supply == 0:
			for i, a in enumerate(high_actions):
				if a.startswith('ATTACK_'):
					excluded_actions.append(i)
			
		return current_state, excluded_actions

	def step(self, obs):
		super(TableQLearnAgent, self).step(obs)

		# print(obs.observation.score_cumulative.score)
		if obs.last():
			reward = obs.reward

			print('\tGame end: Result: %d' % (reward))
			self.qlearn.learn(str(self.previous_state), self.previous_action, reward, 'terminal')
			self.qlearn.q_table.to_pickle(self.output_file + '.gz', 'gzip')
			self.qlearn.q_table.to_csv(self.output_file)

			if reward == -1:
				result = 'loss'
			elif reward == 0:
				result = 'tie'
			else:
				result = 'win'
			
			self.game_outcomes.append({'game':self.game_count, 'outcome':result, 'score':obs.observation.score_cumulative.score, 'time':datetime.datetime.now()})
			pd.DataFrame(self.game_outcomes).to_csv(self.output_game_outcome_file)
			return self.utils.nothing()

		if obs.first():
			player_y, player_x = (obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero()
			self.base_top_left = True if player_y.any() and player_y.mean() <= 31 else False

			command_center = self.utils.get_units(obs, units.Terran.CommandCenter)[0]
			self.command_center_loc = (command_center.x, command_center.y)

		if len(self.queue) == 0:
			current_state, excluded_actions = self.create_state(obs)
			# print(current_state.tolist())

			if self.previous_action is not None:
				self.qlearn.learn(str(self.previous_state), self.previous_action, 0, str(current_state))
				
			rl_action = self.qlearn.choose_action(str(current_state), excluded_actions)

			high_action = high_actions[rl_action]
			new_actions = action_mapping[high_action]
			self.queue.extend(new_actions)

			self.game_actions.append({'action':high_action, 'time':datetime.datetime.now(), 'state':current_state.tolist(), 'score':obs.observation.score_cumulative.score})
			pd.DataFrame(self.game_actions).to_csv(self.output_actions_prefix % (self.game_count))

			self.previous_state = current_state
			self.previous_action = rl_action

		valid_action = self.queue.popleft()

		return self.utils.do(obs, valid_action, self.base_top_left, self.command_center_loc)
Пример #9
0
    '-p',
    '--project',
    nargs='?',
    required=True,
    help='The Google Cloud Pub/Sub project in which the topic exists.')
parser.add_argument(
    '-t',
    '--topic',
    nargs='?',
    required=True,
    help='The Google Cloud Pub/Sub topic name to which to publish.')
parser.add_argument(
    '-s',
    '--subscription',
    nargs='?',
    required=True,
    help='The Google Cloud Pub/Sub subscriber name on which to subscribe.')

args = parser.parse_args()

subscriber = ActionSubscriber(args.project, args.subscription)

publisher = ActionPublisher(args.project, args.topic)

for a in ActionUtils.parse_from_csv(args.input):
    publisher.publish(a)

time.sleep(10)

print('View action count: {}'.format(subscriber.get_view_count()))
Пример #10
0
class Simulation:
    def __init__(self, path, controller_weights=None):
        self.params = json.load(open(path))[0]
        self.load_model(controller_weights=controller_weights)
        self.env = gym.make(self.params['env_name'])

    def load_model(self, controller_weights=None):
        p = self.params
        self.action_utils = ActionUtils(p['env_name'])
        self.action_size = self.action_utils.action_size()

        self.vae = VAE()
        sys.stdout = open(os.devnull, 'w')
        self.vae.make_vae_shape(p['img_size'], p['img_size'], p['latent_size'])
        sys.stdout = sys.__stdout__
        self.vae.load_model('../' + p['vae_hps']['weights_path'])

        # TODO: Make MDN just take in all of params.
        mdn_hps = p['mdn_hps']
        mdn_hps['max_seq_len'] = p['max_seq_len']
        mdn_hps['in_width'] = p['latent_size'] + self.action_size
        mdn_hps['out_width'] = p['latent_size']
        mdn_hps['action_size'] = self.action_size
        mdn_hps['rnn_size'] = p['hidden_size']
        mdn_hps['batch_size'] = 1
        mdn_hps['max_seq_len'] = 1
        mdn_hps['use_recurrent_dropout'] = 0
        mdn_hps['training'] = 0
        # self.mdn_rnn = MDNRNN(mdn_hps)
        # hps_inf = MDNRNN.set_hps_to_inference(hps)
        self.mdn_rnn = MDNRNN(mdn_hps)
        self.mdn_rnn.load('../' + p['mdn_hps']['weights_path'])

        self.controller = ControllerModel(
            [p['latent_size'] + p['hidden_size'], self.action_size])
        if controller_weights:
            self.controller.load_weights(controller_weights)

    def simulate(self, dreaming=False, render=False):
        rewards = []
        for i in range(1):
            obs = self.env.reset()

            # initialize hidden + action variables
            state = self.mdn_rnn.rnn_init_state()
            a = self.env.action_space.sample()
            h = np.zeros((1, self.params['hidden_size']))
            c = np.zeros((1, self.params['hidden_size']))

            total_reward = 0
            if dreaming:
                img = self.env.render(mode='rgb_array')
                img = compress_image(img, size=self.params['img_size'])
                z = self.vae.encode_image(np.array([img]))[0]
                for t in range(self.params['max_seq_len']):
                    z_current = z.copy()
                    z, state = self.mdn_rnn.sample_z(
                        z_current, self.action_utils.action_to_input(a), state)
                    z = z[0][0]
                    h, c = state[0], state[1]
                    out = self.controller.get_action(
                        np.concatenate((z_current, h[0])))

                    obs, reward, done, info = self.env.step(
                        self.action_utils.output_to_action(out))
                    total_reward += reward
                    if done:
                        print('Episode finished after {} timesteps'.format(t +
                                                                           1))
                        break
            else:
                for t in range(self.params['max_seq_len']):
                    img = self.env.render(mode='rgb_array')
                    img = compress_image(img, size=self.params['img_size'])

                    # compute action
                    z = self.vae.encode_image(np.array([img]))[0]
                    print(a)
                    state = self.mdn_rnn.rnn_next_state(
                        z, self.action_utils.action_to_input(a), state)
                    h, c = state[0], state[1]
                    out = self.controller.get_action(np.concatenate((z, h[0])))

                    a = self.action_utils.output_to_action(out)
                    obs, reward, done, info = self.env.step(a)
                    total_reward += reward
                    if done:
                        print('Episode finished after {} timesteps'.format(t +
                                                                           1))
                        break
            rewards.append(total_reward)
        return -np.mean(rewards)
Пример #11
0
 def __init__(self, directory, name):
   self.directory = directory
   self.name = name
   self.utils = ActionUtils()
   self.dmps = None