def get_all_graphs_infos(cls): for (_, _, filenames) in os.walk(FOLDER_GRAPH): for filename in filenames: if '.gz' in filename: graph_name = filename.replace('.gz', '') # Remove .gz extension. domain_name = filename.split('-', 1)[0] options_list = [ '--domain', domain_name, '--graph', graph_name ] exp_params = ExpParams.get_exp_params(options_list) g = StateGraph.load(exp_params) g.print_stats() break
def probe_network(self): exp_params = ExpParams.get_exp_params_from_command_line_args() graph = exp_params.state_class.GAME_GRAPH print "Network predictions:" self.network_predictions = {} # Network predictions. true_values = { } # True values obtained from the graph using value iteration. for state_roll_action_str in sorted(self.network_inputs.iterkeys()): # state_value = self.network_outputs[state_str] state_roll_action_value = self.network.activate( self.network_inputs[state_roll_action_str]) self.network_predictions[ state_roll_action_str] = state_roll_action_value node_id = graph.get_node_id( state_roll_action_str[:-4]) # Removes roll and action. true_value = graph.get_attr(node_id, VAL_ATTR) true_values[state_roll_action_str] = true_value # print "%s -> %s (%.2f)" % (state_str, state_value, abs_value) for (si, ai), _ in sorted(self.visit_count.iteritems(), key=lambda (k, v): (v, k)): state_roll_action_str = '%s-%s' % (si, ai) true_value = true_values[state_roll_action_str] # Reward for white win is [1, 0], # Reward for black win is [0, 1], # state_value[0] - state_value[1] ranges from -1 to +1, although # it can exceed those bounds when the network outputs are # outside the range [0, 1]. # The following formula is meant to scale the difference to range [0, 1]. print "(%s, %s): opt. val. for white: %+.2f prediction: %s visited: %d" % ( si, ai, true_value, map(PrettyFloat, self.network_predictions[state_roll_action_str]), self.visit_count.get((si, ai), 0)) print( 'Note: optimal values for white are based on the board ' 'positions only and ignore the current roll.')
reverse=True) for traj, cnt in sorted_traj_count: print "%s: %d" % (traj, cnt) # Reset after each query. self.traj_count = {} def print_learner_state(self): self.print_visit_count() self.print_e() self.probe_network() self.print_traj_counts() if __name__ == '__main__': make_data_folders() exp_params = ExpParams.get_exp_params_from_command_line_args() filename = exp_params.get_trial_filename(FILE_PREFIX_NTD) f = open(filename, 'w') agent_ntd = AgentNTD(exp_params.state_class) if TRAIN_BUDDY == TRAIN_BUDDY_SELF: agent_train_buddy = agent_ntd elif TRAIN_BUDDY == TRAIN_BUDDY_COPY: agent_train_buddy = AgentNTD(exp_params.state_class) elif TRAIN_BUDDY == TRAIN_BUDDY_RANDOM: agent_train_buddy = AgentRandom(exp_params.state_class) agent_eval = Experiment.create_eval_opponent_agent(exp_params) print 'Training buddy is: %s' % agent_train_buddy print 'Evaluation opponent is: %s' % agent_eval
def get_knowledge_filename(cls): exp_params = ExpParams.get_exp_params_from_command_line_args() filename = exp_params.get_custom_filename_no_trial(FOLDER_QTABLE, FILE_PREFIX_SARSA) return filename