class GameFrameBuffer: def __init__(self, size=5): self.size = size self.frames = list() self.visual_debugger = VisualDebugger() @property def full(self): return len(self.frames) >= self.size @property def previous_game_frame(self): return self.frames[0] if len(self.frames) else None def add_game_frame(self, game_frame): if self.full: self.frames = [game_frame] + self.frames[:-1] else: self.frames = [game_frame] + self.frames def to_visual_debugger(self): for i, game_frame in enumerate(self.frames): self.visual_debugger.store_image_data( np.array(game_frame.frame * 255, dtype="uint8"), game_frame.frame.shape, f"frame_{i + 1}" )
class T4TFEnv(py_environment.PyEnvironment): def __init__(self, fake=False, metrics_key='001'): with open('running', 'w') as f: f.write(str(os.getpid())) self._episode_ended = False self.game = serpent.initialize_game('T4TF1') game_frame = self.game.screen_regions['GAME_REGION'] self.width = 10 self.height = 10 self.state_shape = (int(self.height / 2), int(self.width / 2), 1) self._action_spec = array_spec.BoundedArraySpec( shape=(), dtype=np.int32, minimum=0, maximum=1, name='action') self._observation_spec = array_spec.BoundedArraySpec( shape=self.state_shape, dtype=np.float32, minimum=0.0, name='observation') self._state = np.zeros(self.state_shape).astype(np.float32) if fake: return self.interrupted = False self.game.launch() self.game.start_frame_grabber() self.input_controller = InputController(game=self.game) # self.input_proc = self.frame_buffer = FrameGrabber.get_frames([0]) self.frame_buffer = self.extract_game_area(self.frame_buffer) self.width = self.frame_buffer[0].shape[1] self.height = self.frame_buffer[0].shape[0] print('width: %d' % self.width) print('height: %d' % self.height) self.state_shape = (self.height, self.width, 3) self._action_spec = array_spec.BoundedArraySpec( shape=(), dtype=np.int32, minimum=0, maximum=1, name='action') self._observation_spec = array_spec.BoundedArraySpec( shape=self.state_shape, dtype=np.float32, minimum=0.0, name='observation') self._state = np.zeros(self.state_shape).astype(np.float32) # print('created input with pid: %s' % self.input_proc.pid) self.sell_keys = [KeyboardKey.KEY_LEFT_SHIFT, KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_S] self.buy_keys = [KeyboardKey.KEY_LEFT_SHIFT, KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_B] self.step_keys = [KeyboardKey.KEY_LEFT_SHIFT, KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_F] self.visual_debugger = VisualDebugger() self.scraper = T4Scraper(game=self.game, visual_debugger=self.visual_debugger) frame = self.game.grab_latest_frame() self.scraper.current_frame = frame self.pl = 0 self.working_trade = 0 self.current_action = '' self.held = False self.fill_count = 0 self.window_controller = WindowController() self.window_id = self.window_controller.locate_window(".*Mini-Dow .*") # self.window_id = self.window_controller.locate_window(".*S&P .*") self.keys = RedisKeys(metrics_key) # self.redis = redis.Redis(port=6001) self.number_of_trades = 0 self.number_of_wins = 0 self.buys = 0 self.sells = 0 self.holds = 0 self.history = list() self.actions = 0 self.last_action = '' self.previous_write = -1 self.get_metadata() self.active_frame = None self.start_time = time.time() self.step_read_time = 0 self.step_write_time = 0 def get_state(self, zeros=False): if zeros: return np.zeros(self.state_shape).astype(np.float32) self.frame_history = [ np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32), np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32), np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32), np.zeros((int(self.width / 4), int(self.height / 4))).astype(np.float32) ] st = np.stack( self.frame_history, axis=2 ).astype(np.float32) print(st.shape) return st def is_focused(self): return self.window_controller.is_window_focused(self.window_id) def display_frame(self): frame = self.game.grab_latest_frame() self.visual_debugger.store_image_data( frame.frame, frame.frame.shape, 2 ) def action_spec(self): return self._action_spec def observation_spec(self): return self._observation_spec def _reset(self): print('RESTARTING') self._state = np.zeros(self.state_shape).astype(np.float32) self._episode_ended = False return ts.restart(self._state) def update_scraper_frame(self): self.scraper.current_frame = self.game.grab_latest_frame() self.visual_debugger.store_image_data( self.scraper.current_frame.frame, self.scraper.current_frame.frame.shape, str(2) ) def stop(self): self.input_proc.kill() self.game.stop_frame_grabber() return ts.termination(self._state, 0) def write_order(self, order_type): write_start = time.time() # if order_type is self.previous_write: return with open('/home/dan/.wine/drive_c/input.txt', 'w') as f: f.write('%d' % (order_type)) self.step_write_time += (time.time() - write_start) self.previous_write = order_type def step_forward(self): self.write_order(3) def add_to_history(self, frame, action, reward): history_start = time.time() im = Image.fromarray(frame) im.save('history/%d_%d_%f.jpg' % (int(datetime.now().timestamp() * 100), action, reward)) print("history add time: %s seconds" % (time.time() - history_start)) # def log(self, string): # if self.actions % 200 == 0: # print(string) # def _step(self, action): self.step_read_time = 0 self.step_write_time = 0 if self.interrupted: return self.stop() print('----') if self._episode_ended: # The last action ended the episode. Ignore the current action and start # a new episode. return self.reset() if action == 0: # perform buy self.current_action = 'buy' self.working_trade = True # self.input_controller.tap_keys(self.buy_keys, duration=0.001) self.write_order(action) elif action == 1: # perform sell self.current_action = 'sell' self.working_trade = True # self.input_controller.tap_keys(self.sell_keys, duration=0.001) self.write_order(action) elif action == 2: print('hold') self.current_action = 'hold' y = 0 while y < 3: # self.write_order(action) self.step_forward() sleep(0.1) y = y + 1 self.actions += 1 if action < 2: self.number_of_trades += 1 while not self.has_open_positions(): sleep(0.01) if self.interrupted: return self.stop() while self.has_open_positions(): self.step_forward() sleep(0.01) if self.interrupted: return self.stop() self.write_order(4) sleep(0.2) reward = self.reward_agent() if self.actions > 1: self.add_to_history(self.active_frame, action, reward) start_grab_frame = time.time() self.frame_buffer = FrameGrabber.get_frames([0]) self.frame_buffer = self.extract_game_area(self.frame_buffer) print("frame grab time: %s seconds" % (time.time() - start_grab_frame)) self.active_frame = self.frame_buffer[0] # for i, game_frame in enumerate(self.frame_buffer): # if i >= 3: break # self.visual_debugger.store_image_data( # game_frame, # game_frame.shape, # str(i) # ) print(self.frame_buffer[0].shape) # self.frame_history.insert(0, self.frame_buffer[0]) # self._states = self.get_state() states_shape = self.state_shape print(states_shape) self._states = np.reshape(self.frame_buffer[0], (states_shape[0], states_shape[1], states_shape[2])).astype(np.float32) print(self._states.shape) if self.number_of_trades > 0: self.push_metadata(action=self.current_action, reward=reward) print('Wins: %d%% - %d / %d' % ((self.number_of_wins / self.number_of_trades * 100), self.number_of_wins, self.number_of_trades)) # print history if len(self.history) > 0: historical_wins = len(list(filter(lambda x: x[1] > 0, self.history))) print('Wins (last %d): %d%%' % (len(self.history), (historical_wins / len(self.history) * 100))) print('Buys: %d' % self.buys) print('Sells: %d' % self.sells) print('Holds: %d' % self.holds) print('Step read time: %s' % self.step_read_time) print('Step write time: %s' % self.step_write_time) # print(states.shape) return ts.transition(self._states, reward=reward, discount=1.0) def read_position_and_pl(self): read_start = time.time() result = ['',''] while len(result[0]) < 1 or len(result[1]) < 1: with open('/home/dan/.wine/drive_c/output.txt', 'r') as f: result = [x.strip() for x in f.read().split(',')] self.step_read_time += (time.time() - read_start) return (int(result[0]), int(result[1])) def has_open_positions(self): result = self.read_position_and_pl() pos = result[0] pl = result[1] # if self.working_trade: # if pos != 0: # self.working_trade = False # else: # if pl != self.pl: # self.working_trade = False # return False # else: # return True if pos != 0: return True return False def reward_agent(self): # get pl for last trade newPL = self.read_position_and_pl()[1] print('old pl: %d' % self.pl) print('new pl: %d' % newPL) if newPL > self.pl: reward = 1.0 else: reward = -1.0 if self.current_action is 'hold': reward = -0.25 else: if reward > 0: if self.last_action is 'hold': reward = 1.25 # if reward is 1.0: # if self.current_action is 'buy' and self.buys > self.sells: # reward = 0.8 # elif self.current_action is 'sell' and self.sells > self.buys: # reward = 0.8 # elif reward is -1.0: # if self.current_action is 'sell' and self.buys > self.sells: # reward = -0.8 # elif self.current_action is 'buy' and self.sells > self.buys: # reward = -0.8 self.last_action = self.current_action print('REWARD: %f' % reward) self.pl = newPL return reward def extract_game_area(self, frame_buffer): game_area_buffer = [] # for game_frame in frame_buffer.frames: # game_area = cv.extract_region_from_image( # game_frame.grayscale_frame, # self.game.screen_regions["GAME_REGION"] # ) frame = frame_buffer.frames[0].quarter_resolution_frame # frame = FrameTransformer.rescale(frame_buffer.frames[0].grayscale_frame, 0.5) game_area_buffer.append(frame) return game_area_buffer def get_metadata(self): return if self.redis.exists(self.keys.trades): self.number_of_trades = self.redis.llen(self.keys.trades) self.history = list() history_strings = self.redis.lrange(self.keys.trades, -100, 100) for s in history_strings: d = json.loads(s) self.history.append((d['action'], float(d['reward']))) if self.redis.exists(self.keys.wins): self.number_of_wins = int(self.redis.get(self.keys.wins)) if self.redis.exists(self.keys.buys): self.buys = int(self.redis.get(self.keys.buys)) if self.redis.exists(self.keys.sells): self.sells = int(self.redis.get(self.keys.sells)) if self.redis.exists(self.keys.holds): self.holds = int(self.redis.get(self.keys.holds)) def push_metadata(self, action, reward, reset=False): return if action is None: return # last 100 if not action is 'hold': while len(self.history) >= 100: self.history.pop(0) self.history.append((action, reward)) obj = { 'timestamp':str(datetime.now().timestamp()), 'action': action, 'reward': str(reward) } self.redis.rpush(self.keys.trades, json.dumps(obj)) if action == 'buy': self.buys += 1 self.redis.incr(self.keys.buys) elif action == 'sell': self.sells += 1 self.redis.incr(self.keys.sells) elif action == 'hold': self.holds += 1 self.redis.incr(self.keys.holds) if reward > 0: self.number_of_wins += 1 self.redis.incr(self.keys.wins) if reset: self.redis.set(self.keys.buys, "0") self.redis.set(self.keys.sells, "0") self.redis.set(self.keys.wins, "0") self.redis.set(self.keys.trades, "0") self.redis.set(self.keys.holds, "0")
class DQN: def __init__( self, input_shape=None, input_mapping=None, replay_memory_size=10000, batch_size=32, action_space=None, max_steps=1000000, observe_steps=None, initial_epsilon=1.0, final_epsilon=0.1, gamma=0.99, model_file_path=None, model_learning_rate=2.5e-4, override_epsilon=False ): self.type = "DQN" self.input_shape = input_shape self.replay_memory = ReplayMemory(memory_size=replay_memory_size) self.batch_size = batch_size self.action_space = action_space self.action_count = len(self.action_space.combinations) self.action_input_mapping = self._generate_action_space_combination_input_mapping(input_mapping) self.frame_stack = None self.max_steps = max_steps self.observe_steps = observe_steps or (0.1 * replay_memory_size) self.current_observe_step = 0 self.current_step = 0 self.initial_epsilon = initial_epsilon self.final_epsilon = final_epsilon self.previous_epsilon = initial_epsilon self.epsilon_greedy_q_policy = EpsilonGreedyQPolicy( initial_epsilon=self.initial_epsilon, final_epsilon=self.final_epsilon, max_steps=self.max_steps ) self.gamma = gamma self.current_action = None self.current_action_index = None self.current_action_type = None self.first_run = True self.mode = "OBSERVE" self.model_learning_rate = model_learning_rate self.model = self._initialize_model() if model_file_path is not None: self.load_model_weights(model_file_path, override_epsilon) self.model_loss = 0 self.visual_debugger = VisualDebugger() def enter_train_mode(self): if self.previous_epsilon is not None: self.epsilon_greedy_q_policy.epsilon = self.previous_epsilon self.previous_epsilon = None self.mode = "TRAIN" def enter_run_mode(self): self.previous_epsilon = self.epsilon_greedy_q_policy.epsilon self.epsilon_greedy_q_policy.epsilon = 0.01 self.mode = "RUN" def next_step(self): if self.mode == "TRAIN": self.current_step += 1 elif self.mode == "OBSERVE": self.current_observe_step += 1 if self.mode == "OBSERVE" and self.current_observe_step >= self.observe_steps: self.mode = "TRAIN" def build_frame_stack(self, game_frame): frame_stack = np.stack(( game_frame, game_frame, game_frame, game_frame ), axis=2) self.frame_stack = frame_stack.reshape((1,) + frame_stack.shape) def update_frame_stack(self, game_frame_buffer): game_frames = [game_frame.eighth_resolution_grayscale_frame for game_frame in game_frame_buffer.frames] frame_stack = np.stack(game_frames, axis=2) self.frame_stack = frame_stack.reshape((1,) + frame_stack.shape) def append_to_replay_memory(self, game_frame_buffer, reward, terminal=False): previous_frame_stack = self.frame_stack self.update_frame_stack(game_frame_buffer) observation = [ previous_frame_stack, self.current_action_index, reward, self.frame_stack, terminal ] self.replay_memory.add(self.calculate_target_error(observation), observation) def calculate_target_error(self, observation): previous_target = self.model.predict(observation[0])[0][observation[1]] if observation[4]: target = observation[2] else: target = observation[2] + self.gamma * np.max(self.model.predict(observation[3])) return np.abs(target - previous_target) def pick_action(self, action_type=None): if action_type is None: self.compute_action_type() else: self.current_action_type = action_type qs = self.model.predict(self.frame_stack) if self.current_action_type == "RANDOM": self.current_action_index = random.randrange(self.action_count) self.maximum_future_rewards = None elif self.current_action_type == "PREDICTED": self.current_action_index = np.argmax(qs) self.maximum_future_rewards = qs def compute_action_type(self): use_random = self.epsilon_greedy_q_policy.use_random() self.current_action_type = "RANDOM" if use_random else "PREDICTED" def erode_epsilon(self, factor=1): if self.mode == "TRAIN": self.epsilon_greedy_q_policy.erode(factor=factor) def generate_mini_batch(self): if self.mode == "OBSERVE": return None return self.replay_memory.sample(self.batch_size) def train_on_mini_batch(self): mini_batch = self.generate_mini_batch() flashback_indices = random.sample(range(self.batch_size), 6) for i in range(0, len(mini_batch)): if i in flashback_indices: flashback_image = np.squeeze(mini_batch[i][1][3][:, :, :, 1]) self.visual_debugger.store_image_data( np.array(flashback_image * 255, dtype="uint8"), flashback_image.shape, f"flashback_{flashback_indices.index(i) + 1}" ) del flashback_image previous_frame_stack = mini_batch[i][1][0] action_index = mini_batch[i][1][1] reward = mini_batch[i][1][2] frame_stack = mini_batch[i][1][3] terminal = mini_batch[i][1][4] target = self.model.predict(previous_frame_stack) previous_target = target[action_index] projected_future_rewards = self.model.predict(frame_stack) if terminal: target[action_index] = reward else: target[action_index] = reward + self.gamma * np.max(projected_future_rewards) error = np.abs(target[action_index] - previous_target) self.replay_memory.update(mini_batch[i][0], error) self.model.fit(previous_frame_stack, target, epochs=1, verbose=0) def generate_action(self): self.current_action = self.action_space.combinations[self.current_action_index] def get_action_for_index(self, action_index): return [action_input.upper() for action_input in self.action_input_mapping[self.action_space.combinations[action_index]]] def get_input_values(self): return self.action_input_mapping[self.current_action] def save_model_weights(self, file_path_prefix="datasets/model_", is_checkpoint=False): epsilon = self.epsilon_greedy_q_policy.epsilon if is_checkpoint: file_path = f"{file_path_prefix}_dqn_{self.current_step}_{epsilon}_.h5" else: file_path = f"{file_path_prefix}_dqn_{epsilon}_.h5" self.model.save_weights(file_path, overwrite=True) def load_model_weights(self, file_path, override_epsilon): self.model.load_weights(file_path) self.model.compile(loss="logcosh", optimizer=Adam(lr=self.model_learning_rate, clipvalue=10)) *args, steps, epsilon, extension = file_path.split("_") self.current_step = int(steps) if override_epsilon: self.previous_epsilon = float(epsilon) self.epsilon_greedy_q_policy.epsilon = float(epsilon) def output_step_data(self): if self.mode in ["TRAIN", "OBSERVE"]: print(f"CURRENT MODE: {self.mode}") else: cprint(f"CURRENT MODE: {self.mode}", "grey", "on_yellow", attrs=["dark"]) print(f"CURRENT STEP: {self.current_step}") if self.mode == "OBSERVE": print(f"CURRENT OBSERVE STEP: {self.current_observe_step}") print(f"OBSERVE STEPS: {self.observe_steps}") print(f"CURRENT EPSILON: {round(self.epsilon_greedy_q_policy.epsilon, 6)}") print(f"CURRENT RANDOM ACTION PROBABILITY: {round(self.epsilon_greedy_q_policy.epsilon * 100.0, 2)}%") print(f"LOSS: {self.model_loss}") def _initialize_model(self): input_layer = Input(shape=self.input_shape) tower_1 = Convolution2D(16, 1, 1, border_mode="same", activation="elu")(input_layer) tower_1 = Convolution2D(16, 3, 3, border_mode="same", activation="elu")(tower_1) tower_2 = Convolution2D(16, 1, 1, border_mode="same", activation="elu")(input_layer) tower_2 = Convolution2D(16, 3, 3, border_mode="same", activation="elu")(tower_2) tower_2 = Convolution2D(16, 3, 3, border_mode="same", activation="elu")(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), border_mode="same")(input_layer) tower_3 = Convolution2D(16, 1, 1, border_mode="same", activation="elu")(tower_3) merged_layer = merge([tower_1, tower_2, tower_3], mode="concat", concat_axis=1) output = AveragePooling2D((7, 7), strides=(8, 8))(merged_layer) output = Flatten()(output) output = Dense(self.action_count)(output) model = Model(input=input_layer, output=output) model.compile(rmsprop(lr=self.model_learning_rate, clipvalue=1), "mse") return model def _generate_action_space_combination_input_mapping(self, input_mapping): action_input_mapping = dict() for combination in self.action_space.combinations: combination_values = self.action_space.values_for_combination(combination) input_values = [input_mapping[combination_value] for combination_value in combination_values if combination_value is not None] action_input_mapping[combination] = list(itertools.chain.from_iterable(input_values)) return action_input_mapping
class SerpentBombermanGameAgent(GameAgent): def __init__(self, **kwargs): super().__init__(**kwargs) self.frame_handlers['PLAY'] = self.handle_play self.frame_handler_setups['PLAY'] = self.setup_play self.value = None #print('Sprites') #print(type(self.game.sprites)) #print('game') #print(self.game) #print('game type') #print(type(self.game)) self.spriteGO = self.game.sprites.get('SPRITE_GAME_OVER') self.spriteWO = self.game.sprites.get('SPRITE_GAME_WON') self.spriteGirl = self.game.sprites.get('SPRITE_BETTY_0') self.printer = TerminalPrinter() self.visual_debugger = VisualDebugger() self.gamestate = Game() def setup_play(self): game_inputs = { "MoveUp": [KeyboardKey.KEY_UP], "MoveDown": [KeyboardKey.KEY_DOWN], "MoveLeft": [KeyboardKey.KEY_LEFT], "MoveRight": [KeyboardKey.KEY_RIGHT], "LeaveBomb": [KeyboardKey.KEY_SPACE], "None": [0] } self.game_inputs = game_inputs self.game_actions = [ KeyboardKey.KEY_UP, KeyboardKey.KEY_DOWN, KeyboardKey.KEY_LEFT, KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_SPACE, None ] ##120, 137 self.dqn_agent = KerasAgent(shape=(104, 136, 1), action_size=len(self.game_actions)) #load model #self.ppo_agent.restore_model() self.first_run = True ##states trainning self.epoch = 1 self.total_reward = 0 ##state & action self.prev_state = None self.prev_action = None self.prev_reward = 0 print("Enter - Auto Save") self.input_controller.tap_key(KeyboardKey.KEY_ENTER) self.gamestate.restartState() time.sleep(2) def extract_game_area(self, frame_buffer): game_area_buffer = [] for game_frame in frame_buffer.frames: game_area = \ serpent.cv.extract_region_from_image(game_frame.grayscale_frame,self.game.screen_regions['GAME_REGION']) frame = FrameTransformer.rescale(game_area, 0.25) game_area_buffer.append(frame) print(np.array(game_area_buffer).shape) return np.array(game_area_buffer) def convert_to_rgba(self, matrix): #print(matrix) new_matrix = [] for x in range(0, len(matrix)): line = [] for y in range(0, len(matrix[x])): #pixel pixel = matrix[x][y] new_pixel = [pixel[0], pixel[1], pixel[2], 255] line.append(new_pixel) new_matrix.append(line) return np.array(new_matrix) def update_game_state(self, frame): game_area = \ serpent.cv.extract_region_from_image(frame,self.game.screen_regions['GAME_REGION']) #game ... # 0,0 # 32,32 game_squares = [[None for j in range(0, 11)] for i in range(0, 15)] const_offset = 8 const = 32 #game variables self.gamestate.bombs = [] #{x, y} self.gamestate.enemies = [] #{x,y} #force girl to die if not found girl_found = False for i in range(0, 15): for j in range(0, 11): izq = ((j + 1) * const - const_offset, (i + 1) * const - const_offset) der = ((j + 2) * const + const_offset, (i + 2) * const + const_offset) reg = (izq[0], izq[1], der[0], der[1]) square = serpent.cv.extract_region_from_image(game_area, reg) square = self.convert_to_rgba(square) sprite_to_locate = Sprite("QUERY", image_data=square[..., np.newaxis]) sprite = self.sprite_identifier.identify( sprite_to_locate, mode="SIGNATURE_COLORS") game_squares[i][j] = sprite if ("SPRITE_BETTY" in sprite): self.girl = {"x": i, "y": j} girl_found = True elif ("SPRITE_GEORGE" in sprite): self.gamestate.enemies.append({"x": i, "y": j}) elif ("SPRITE_BOMB" in sprite): self.gamestate.bombs.append({"x": i, "y": j}) self.gamestate.girl_alive = girl_found self.gamestate.done = not girl_found return game_squares def handle_play(self, game_frame): #self.printer.add("") #self.printer.add("BombermanAI") #self.printer.add("Reinforcement Learning: Training a PPO Agent") #self.printer.add("") #self.printer.add(f"Stage Started At: {self.started_at}") #self.printer.add(f"Current Run: #{self.current_attempts}") #self.printer.add("") #self.check_game_state(game_frame) #####################CHECK STATE########################### #game over? locationGO = None sprite_to_locate = Sprite("QUERY", image_data=self.spriteGO.image_data) sprite_locator = SpriteLocator() locationGO = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) #print("Location Game over:",locationGO) #won game? locationWO = None sprite_to_locate = Sprite("QUERY", image_data=self.spriteWO.image_data) sprite_locator = SpriteLocator() locationWO = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) #print("Location Game won:",locationWO) self.gamestate.victory = locationWO != None self.gamestate.lose = locationGO != None self.gamestate.girl_alive = (locationGO == None and locationWO == None) self.gamestate.done = not self.gamestate.girl_alive print(f"Is alive? {self.gamestate.girl_alive}") print(f"Game over? {self.gamestate.lose}") print(f"Won? {self.gamestate.victory}") #####################VISUAL DEBUGGER########################### for i, game_frame in enumerate(self.game_frame_buffer.frames): self.visual_debugger.store_image_data(game_frame.frame, game_frame.frame.shape, str(i)) #####################MODEL########################### #get buffer frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3], frame_type="PIPELINE") game_frame_buffer = self.extract_game_area(frame_buffer) state = game_frame_buffer.reshape(4, 104, 136, 1) if (self.gamestate.done): print(f"Game over, attemp {self.epoch}") if (self.epoch % 10) == 0: print("saving model") self.dqn_agent.save_model( f"bombergirl_epoch_{self.epoch}.model") self.printer.save_file() self.printer.add( f"{self.gamestate.victory},{self.gamestate.lose},{self.epoch},{self.gamestate.time},{self.total_reward}" ) self.total_reward = 0 self.dqn_agent.remember(self.prev_state, self.prev_action, self.prev_reward, state, True) self.dqn_agent.replay() self.input_controller.tap_key(KeyboardKey.KEY_ENTER) self.epoch += 1 self.total_reward = 0 self.gamestate.restartState() self.prev_state = None self.prev_action = None else: #update time self.gamestate.updateTime() #print(np.stack(game_frame_buffer,axis=1).shape) #print(game_frame_buffer.shape) #print(state.shape) if (not (self.prev_state is None) and not (self.prev_action is None)): self.dqn_agent.remember(self.prev_state, self.prev_action, self.prev_reward, state, False) #do something action_index = self.dqn_agent.act(state) #get key action = self.game_actions[action_index] #get random frame from buffer game_frame_rand = random.choice(frame_buffer.frames).frame #update enviroment accorind to frame ###################FUN UPDATE STATE######################################### game_area = \ serpent.cv.extract_region_from_image(game_frame_rand,self.game.screen_regions['GAME_REGION']) #game ... # 0,0 # 32,32 game_squares = [[None for j in range(0, 11)] for i in range(0, 15)] const_offset = 8 const = 32 #game variables self.gamestate.bombs = [] #{x, y} self.gamestate.enemies = [] #{x,y} #force girl to die if not found girl_found = False for i in range(0, 15): for j in range(0, 11): izq = ((j + 1) * const - const_offset, (i + 1) * const - const_offset) der = ((j + 2) * const + const_offset, (i + 2) * const + const_offset) reg = (izq[0], izq[1], der[0], der[1]) square = serpent.cv.extract_region_from_image( game_area, reg) square = self.convert_to_rgba(square) sprite_to_locate = Sprite("QUERY", image_data=square[..., np.newaxis]) sprite = self.sprite_identifier.identify( sprite_to_locate, mode="SIGNATURE_COLORS") game_squares[i][j] = sprite if ("SPRITE_BETTY" in sprite): self.girl = {"x": i, "y": j} girl_found = True elif ("SPRITE_GEORGE" in sprite): self.gamestate.enemies.append({"x": i, "y": j}) elif ("SPRITE_BOMB" in sprite): self.gamestate.bombs.append({"x": i, "y": j}) elif ("SPRITE_BONUSES" in sprite): self.gamestate.bonus.append({"x": i, "y": j}) #####################CHECK STATE########################### #game over? locationGO = None sprite_to_locate = Sprite("QUERY", image_data=self.spriteGO.image_data) sprite_locator = SpriteLocator() locationGO = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) #print("Location Game over:",locationGO) #won game? locationWO = None sprite_to_locate = Sprite("QUERY", image_data=self.spriteWO.image_data) sprite_locator = SpriteLocator() locationWO = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) #print("Location Game won:",locationWO) self.gamestate.lose = locationGO != None self.gamestate.victory = locationWO != None self.gamestate.girl_alive = (locationGO == None and locationWO == None) self.gamestate.done = not self.gamestate.girl_alive print(f"Is alive? {self.gamestate.girl_alive}") print(f"Game over? {self.gamestate.lose}") print(f"Won? {self.gamestate.victory}") ###################REWARD######################################### #get reward reward = self.gamestate.getReward(action_index) self.total_reward += reward self.prev_state = state self.prev_action = action_index self.prev_reward = reward if (action): self.input_controller.tap_key( action, 0.15 if action_index < 4 else 0.01) print( f"Action: {self.gamestate.game_inputs[action_index]}, reward: {reward}, total_reward: {self.total_reward}" ) #action, label, value = self.ppo_agent.generate_action(game_frame_buffer) #print(action, label, value) #key, value = random.choice(list(self.game_inputs.items())) #if(value[0]): # self.input_controller.tap_key(value[0]) #game_squares = self.extract_game_squares(game_frame.frame) def check_game_state(self, game_frame): #game over? locationGO = None sprite_to_locate = Sprite("QUERY", image_data=self.spriteGO.image_data) sprite_locator = SpriteLocator() locationGO = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame) print("Location Game over:", locationGO) #won game? locationWO = None sprite_to_locate = Sprite("QUERY", image_data=self.spriteWO.image_data) sprite_locator = SpriteLocator() locationWO = sprite_locator.locate(sprite=sprite_to_locate, game_frame=game_frame.frames) print("Location Game won:", locationWO) self.gamestate.girl_alive = (locationGO == None and locationWO == None) self.gamestate.done = not self.gamestate.girl_alive self.gamestate.victory = locationWO != None print(f"Is alive? {self.gamestate.girl_alive}") print(f"Game over? {self.gamestate.lose}") print(f"Won? {self.gamestate.victory}")