def play(env, agent, config, evaluation=False): if evaluation: print('\nEvaluation:') mean_rewards = 0 max_reward = 0 max_mean_rewards = 0 num_episodes = config['eval_episodes'] if evaluation else config['train_episodes'] for episode in tqdm.tqdm(range(num_episodes)): observations, infos = env.reset() infos_array = dict_to_array(infos, config['environment_batch_size']) rewards = [0] * config['environment_batch_size'] dones = [False] * config['environment_batch_size'] max_score = max([info['max_score'] for info in infos_array]) steps = 0 # TODO: maybe condition on max_steps as well. while not all(dones): win_factor = max_mean_rewards / float(max_score) if config['use_adaptive_epsilon'] else None actions = agent.choose_actions(observations, infos_array, dones, evaluation, win_factor=win_factor) new_observations, new_rewards, new_dones, new_infos = env.step(actions) new_infos_array = dict_to_array(new_infos, config['environment_batch_size']) for idx, done in enumerate(dones): if not done and not evaluation: agent.add_state(observations[idx], infos_array[idx], actions[idx], new_observations[idx], new_infos_array[idx], new_rewards[idx] - rewards[idx], dones[idx]) observations = new_observations infos_array = new_infos_array rewards = new_rewards dones = new_dones if not evaluation and steps % config['update_frequency'] == 0: agent.train() steps += 1 mean_rewards = np.mean(rewards) max_reward = max(max_reward, max(rewards)) max_mean_rewards = max(max_mean_rewards, np.mean(rewards)) wins_percentage = sum([info['has_won'] for info in infos_array]) * 100. / len(infos_array) print('Mean rewards: {}({}), steps: {}, max reward: {}({}), wins percentage - {}'.format( mean_rewards, max_mean_rewards, steps, max_reward, max_score, wins_percentage)) agent.end_episode() if not evaluation and mean_rewards > 0.99 * max_score: break return
def __group_by_field(self, i, field): '''Group a test index subset i by field (SNP=0, sample=1).''' size = self.problem.genotype.data.shape[field] group_count = util.dict_to_array(statutil.group_by_value(self.test_index[field][i])) result = np.zeros((size,), dtype=int) result[group_count['k']] = group_count['v'] return result
def __group_by_field(self, i, field): '''Group a test index subset i by field (SNP=0, sample=1).''' size = self.problem.genotype.data.shape[field] group_count = util.dict_to_array( statutil.group_by_value(self.test_index[field][i])) result = np.zeros((size, ), dtype=int) result[group_count['k']] = group_count['v'] return result
def __group_by_field(g, i, field): '''Group a test index subset i by field (SNP=0, sample=1).''' group_count = util.dict_to_array(statutil.group_by_value(i[field])) result = np.zeros((g.shape[field],), dtype=int) result[group_count['k']] = group_count['v'] return result
def __group_by_field(g, i, field): '''Group a test index subset i by field (SNP=0, sample=1).''' group_count = util.dict_to_array(statutil.group_by_value(i[field])) result = np.zeros((g.shape[field], ), dtype=int) result[group_count['k']] = group_count['v'] return result