def evaluate(model: str, output_csv: str, bat_speed: int, shape_file: str = SHAPE_FILE): for i in range(N_EPISODE): random.seed(random.randint(0, 1000)) print("Episode : {}".format(i)) print("S : {}".format(bat_speed)) brain = Dqn(5, 2 * ANGLE_RANGE + 1, GAMMA) if path.exists(model): brain.load(model_file=model) if path.exists(output_csv): data = pd.read_csv(output_csv) experiment_number = data['experiment'].max() + 1 else: columns = [ 'experiment', 'time', 'speed', 'gamma', 'signal1', 'signal2', 'signal3', 'distance_to_goal', 'action', 'orientation', 'reward' ] if ADDING_OBS: for i in range(-ANGLE_RANGE, ANGLE_RANGE + 1): columns.append('angle_' + str(i)) data = pd.DataFrame(columns=columns) experiment_number = 1 if ADDING_OBS: game = Game(model=brain, experiment_number=experiment_number, bat_speed=bat_speed, training_mode=False, shape_file=shape_file) else: game = Game(model=brain, experiment_number=experiment_number, bat_speed=bat_speed, training_mode=True, shape_file=shape_file) print("Size : {}".format(game.state.sand.shape)) done = False n_actions = 0 while not done: game.update() result_of_moves = game.last_action() # print(result_of_moves) if result_of_moves == 'GOAL' or result_of_moves == 'HIT_TREE': done = True n_actions += 1 if n_actions >= N_MOVES: done = True print("Save data") data = pd.concat([data, pd.DataFrame(game.state.sample)]) data.to_csv(output_csv, index=False)
def __init__(self, model: str, bat_speed: int): super(Game, self).__init__() self.height = 500 self.width = 500 self.action2rotation = [ i for i in range(-ANGLE_RANGE, ANGLE_RANGE + 1, 1) ] self.state = State() self.state.bat_speed = bat_speed self.state.brain = Dqn(5, 2 * ANGLE_RANGE + 1, GAMMA) if path.exists(model): print("Loading brain") self.state.brain.load(model) self.state.experiment = 1
def __init__(self, **kwargs): """ Args: **kwargs: """ super(Game, self).__init__() self.height = 500 self.width = 500 self.action2rotation = [ i for i in range(-ANGLE_RANGE, ANGLE_RANGE + 1, 1)] self.state = State() self.state.brain = Dqn(5, 2 * ANGLE_RANGE + 1, GAMMA) if path.exists(MODEL_FILE): self.state.brain.load(MODEL_FILE) self.state.experiment = 1
MODLE_FILE = 'base_brain.pth' OUTPUT_CSV = 'eval.csv' N_EPISODE = 300 N_MOVES = 1500 if __name__ == '__main__': for i in range(N_EPISODE): random.seed(random.randint(0, 1000)) print("Episode : {}".format(i)) # INIT ENV FOR TRAINING # 1 DEFINE MODEL brain = Dqn(5, 2 * ANGLE_RANGE + 1, GAMMA) if path.exists(MODLE_FILE): brain.load(MODLE_FILE) if path.exists(OUTPUT_CSV): data = pd.read_csv(OUTPUT_CSV) experiment_number = data['experiment'].max() + 1 else: columns = [ 'experiment', 'time', 'speed', 'gamma', 'signal1', 'signal2', 'signal3', 'distance_to_goal', 'action', 'orientation', 'reward' ] data = pd.DataFrame(columns=columns)
def training(model_file: str, output_csv: str, bat_speed: int, num_episodes: int = N_EPISODE, moves_per_episode: int = N_MOVES, update_only_better_reward: bool = True, shap_file: str = 'shape'): for i in range(num_episodes): print("Episode : {}".format(i)) # INIT ENV FOR TRAINING # 1 define model brain = Dqn(5, 2 * ANGLE_RANGE + 1, GAMMA) # 2 loading and init the new data frame if needed if path.exists(model_file): brain.load(model_file) if path.exists(output_csv): data = pd.read_csv(output_csv) experiment_number = data['experiment'].max() + 1 else: columns = [ 'experiment', 'time', 'speed', 'gamma', 'signal1', 'signal2', 'signal3', 'distance_to_goal', 'action', 'orientation', 'reward' ] data = pd.DataFrame(columns=columns) experiment_number = 1 game = Game(model=brain, experiment_number=experiment_number, bat_speed=bat_speed, training_mode=True, shape_file=shap_file) for _ in range(moves_per_episode): # Each update called would result an additional one row of data store in game.state.sample game.update() if update_only_better_reward: if data.shape[0] == 0: current_max_cumulative_reward = -10000000 else: tmp_df = data[data['experiment'] == (experiment_number - 1)] current_max_cumulative_reward = tmp_df.reward.sum() new_df = pd.DataFrame(game.state.sample) sum_reward = new_df['reward'].sum() print('Sum reward {}'.format(sum_reward)) print("Current max {}".format(current_max_cumulative_reward)) if sum_reward > current_max_cumulative_reward: print("Save data") data = pd.concat([data, new_df]) data.to_csv(output_csv, index=False) print('Save model') game.state.brain.save(model_file) else: print("Save data") data = pd.concat([data, pd.DataFrame(game.state.sample)]) data.to_csv(output_csv, index=False) print('Save model') game.state.brain.save(model_file)