class NNAgentSigmoid(Agent): """Template for neural network based agents""" data_dir = Utility.get_data_path() learning_rate = 0.01 def __init__(self, environment: Environment, seed: int, hidden_size: [int], activation_name: str = "relu"): """Load appropriate parameters depending on environment and learning time""" super().__init__(environment, seed) input_size = [self.environment.observation_length + self.environment.action_length] output_size = [self.environment.reward_length] self.nn = NeuralNetworkSigmoid(activation_name, input_size + hidden_size + output_size) self.activations = ([], []) def calculate_action(self, observation: str) \ -> str: """Feed percept into nn and calculate best activations action. Returns action.""" action = "" reward = -2 # calculate expected reward by trying every action number_of_actions = pow(2, self.environment.action_length) if self.seeded_rand_range(0, 10) == 0: action_idx = self.seeded_rand_range(0, number_of_actions) action_string = format(action_idx, 'b').zfill(self.environment.action_length) nn_input = NNUtility.bitstr_to_narray(observation + action_string) nn_output = self.nn.forward(nn_input) action = action_string self.activations = nn_output else: for action_idx in range(number_of_actions): action_string = format(action_idx, 'b').zfill(self.environment.action_length) nn_input = NNUtility.bitstr_to_narray(observation + action_string) nn_output = self.nn.forward(nn_input) reward_string = NNUtility.narray_to_bitstr(nn_output[1][-1]) action_reward = Utility.get_reward_from_bitstring(reward_string) if action_reward == reward: i = self.seeded_rand_range() if i == 1: action = action_string self.activations = nn_output else: if action_reward > reward: action = action_string reward = action_reward self.activations = nn_output return action def train(self, reward: str): """Train agent on received reward""" self.nn.backward(self.activations, NNUtility.bitstr_to_narray(reward))
import pickle import matplotlib.pyplot as plt from python.src import Utility from matplotlib import rc rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 12}) rc('text', usetex=True) plt.rcParams['axes.axisbelow'] = True data_dir_path = Utility.get_data_path() plots_path = Utility.get_plots_path() apiq_dict_path = data_dir_path.joinpath("apiq_dict.apiq") apiq_dict = pickle.load(apiq_dict_path.open("rb")) # bar plot of apiq values f = plt.figure(figsize=(6, 4), dpi=400) agents = list(apiq_dict.keys()) apiq_values = [v["mean"] for v in apiq_dict.values()] apiq_errors = [v["error"] for v in apiq_dict.values()] ypos = [len(agents) - 1 - y for y in range(len(agents))] plt.barh(ypos[:4], apiq_values[:4], color="#076678") plt.barh(ypos[4:5], apiq_values[4:5], color="#689d6a") plt.barh(ypos[5:], apiq_values[5:], color="#8f3f71") plt.errorbar(apiq_values, ypos, xerr=apiq_errors, fmt=',', ecolor='black', capsize=4) plt.yticks( ypos,