def reset(self): rustLib.field_reset(self.field) self.invalid_tries = 0 self.reward_finding_right = False self.rewards = [] self.amount_limitsurpass = 0 return field_to_array(self.field)
def reset(self): self.prev_reward = 0 rustLib.field_reset(self.field) new_observation = self.get_obs() if self.use_cnn: new_observation = new_observation.repeat(2,axis=1).repeat(2,axis=0) return new_observation
def generate_pretraindata_heuristics(save_interval, amount_of_games, is_box_space): env = gym.make("rustydiscrete-v0") actions = [] boxActions = [] observations = [] rewards = [] episode_returns = [] episode_starts = [] lastFile = "" startAt = 0 directory = "D:\\4-System\\rusty\\" for f in os.listdir(directory): if f.endswith("npz"): lastFile = f # if len(lastFile) > 0: # print("LOADING FILE", directory+lastFile, os.listdir(directory)) # loadedData = np.load(directory+lastFile) # actions = loadedData["actions"].tolist() # observations = loadedData["obs"].tolist() # rewards = loadedData["rewards"].tolist() # episode_returns = loadedData["episode_returns"].tolist() # episode_starts = loadedData["episode_starts"].tolist() # startAt = int(lastFile.split("_")[0]) + 1 # print("LOADED DATA IS", lastFile, len(episode_returns), startAt) # loadedData = None gameField = rustLib.field_new() print("Beginning Games") for i in range(startAt, amount_of_games): game_over = False reward_sum = 0 while not game_over: action = rustLib.field_counter_action_index(gameField) answer = rustLib.field_do_action_with_answer(gameField, action, 1) if answer.placed != 0: print("Error a block couldn't be placed", action, answer) sys.exit(-1) actionArr = np.zeros(190, dtype=float) actionArr[action] = 1 boxActions.append(actionArr) actions.append(action) observations.append(field_to_array(gameField)) game_over = answer.done == 0 reward = answer.reward episode_starts.append(game_over) if game_over: winner = answer.winner reward += 3.0 if winner == 0 else -3.0 rewards.append(reward) reward_sum += reward if game_over: episode_returns.append(reward_sum) print("DONE WITH GAME NUMBER", i, reward_sum) rustLib.field_reset(gameField) if i > 0 and i % save_interval == 0: numpy_dict = { 'actions': np.array(actions).reshape((-1, 1)), 'obs': np.array(observations), 'rewards': np.array(rewards), 'episode_returns': np.array(episode_returns), 'episode_starts': np.array(episode_starts[:-1]) } np.savez(directory + str(i) + "_heutistic_pretrain_discrete.npz", **numpy_dict) numpy_dict = { 'actions': np.concatenate(boxActions).reshape((-1, ) + boxEnv.action_space.shape), 'obs': np.array(observations), 'rewards': np.array(rewards), 'episode_returns': np.array(episode_returns), 'episode_starts': np.array(episode_starts[:-1]) } np.savez(directory + str(i) + "_heutistic_pretrain_box.npz", **numpy_dict) numpy_dict = None rustLib.field_free(gameField) numpy_dict = { 'actions': np.concatenate(boxActions).reshape((-1, ) + boxEnv.action_space.shape) if is_box_space else np.array(actions).reshape((-1, 1)), 'obs': np.array(observations), 'rewards': np.array(rewards), 'episode_returns': np.array(episode_returns), 'episode_starts': np.array(episode_starts[:-1]) } return numpy_dict