def __init__(self, args): """ Agent's Parameters """ self.n = args.n self.sigma = args.sigma self.beta = args.beta self.number_of_tilings = args.tilings self.alpha = np.float64(args.alpha) / self.number_of_tilings """ Experiment Configuration """ self.config = Config() self.summary = { } # self.summary will contain the following keys: return_per_episode, steps_per_episode self.config.save_summary = True " Environment Parameters " self.config.max_actions = 100000 self.config.num_actions = 3 # Number actions in Mountain Car self.config.obs_dims = [ 2 ] # Dimensions of the observations experienced by the agent " TileCoder Parameters " self.config.num_tilings = self.number_of_tilings self.config.tiling_side_length = 8 self.config.num_dims = 2 self.config.alpha = self.alpha self.config.state_space_range = np.array([0.5 + 1.2, 0.07 + 0.07], dtype=np.float64) " Policies Parameters " self.config.target_policy = Config() self.config.target_policy.initial_epsilon = 0.1 self.config.target_policy.anneal_epsilon = False self.config.target_policy.annealing_period = 0 self.config.target_policy.final_epsilon = 0.1 self.config.anneal_steps_count = 0 " QSigma Agent " self.config.n = self.n self.config.gamma = 1 self.config.beta = self.beta self.config.sigma = self.sigma self.config.use_er_buffer = False self.config.initial_rand_steps = 0 self.config.rand_steps_count = 0 " Environment " self.env = MountainCliff(config=self.config, summary=self.summary) """ Policies """ self.target_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=False) """ TileCoder """ self.function_approximator = TileCoderFA(self.config) """ RL Agent """ self.agent = QSigma(function_approximator=self.function_approximator, target_policy=self.target_policy, behaviour_policy=self.target_policy, environment=self.env, config=self.config, summary=self.summary)
def __init__(self, experiment_arguments, dir_name): homepath = "/home/jfernando/" self.games_directory = homepath + "PycharmProjects/RL_Experiments/Experiments_Engine/Environments/Arcade_Learning_Environment/Supported_Roms/" self.rom_name = "seaquest.bin" self.optimizer = lambda lr: tf.train.RMSPropOptimizer( lr, decay=0.95, momentum=0, epsilon=0.01, centered=True) # self.optimizer = tf.train.GradientDescentOptimizer self.sess = tf.Session() if experiment_arguments.restore_agent: with open(os.path.join(dir_name, 'experiment_config.p'), mode='rb') as experiment_config_file: self.config = pickle.load(experiment_config_file) with open(os.path.join(dir_name, 'summary.p'), mode='rb') as summary_file: self.summary = pickle.load(summary_file) self.config.display_screen = False else: self.config = Config() self.summary = { 'frames_per_episode': [], 'return_per_episode': [], 'cumulative_loss': [], 'training_steps': [] } self.config.save_summary = True """ Environment Parameters """ self.config.display_screen = False self.config.frame_skip = 4 self.config.agent_render = False self.config.repeat_action_probability = 0.25 self.config.frame_stack = 4 self.config.num_actions = 18 # Number of actions in the ALE self.config.obs_dims = [4, 84, 84] # [stack_size, height, width] self.config.color_averaging = True " Models Parameters " self.config.dim_out = [32, 64, 64, 512] self.config.filter_dims = [8, 4, 3] self.config.strides = [4, 2, 1] self.config.gate_fun = tf.nn.relu self.config.conv_layers = 3 self.config.full_layers = 1 self.config.max_pool = False # NCHW doesn't work when working with cpu in tensorflow, but it's more efficient on a gpu self.config.frames_format = experiment_arguments.frame_format self.config.norm_factor = 255.0 " Policies Parameters " " Target Policy " self.config.target_policy = Config() self.config.target_policy.initial_epsilon = experiment_arguments.target_epsilon self.config.target_policy.anneal_epsilon = False " Behavior Policy " self.config.behaviour_policy = Config() self.config.behaviour_policy.initial_epsilon = 0.1 self.config.behaviour_policy.anneal_epsilon = False " QSigma Agent Parameters " self.config.sigma_decay = experiment_arguments.sigma_decay self.config.sigma = experiment_arguments.sigma self.config.n = experiment_arguments.n self.config.gamma = 0.99 self.config.initial_rand_steps = 0 self.config.use_er_buffer = False " Neural Network " self.config.alpha = 0.0025 " Environment " self.env = ALE_Environment(games_directory=self.games_directory, summary=self.summary, rom_filename=self.rom_name, config=self.config) " Models " self.target_network = Model_nCPmFO_wRBFLayer(config=self.config, name='target') """ Policies """ self.target_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=False) self.behaviour_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=True) """ Neural Network """ self.function_approximator = SimpleNeuralNetwork( optimizer=self.optimizer, neural_network=self.target_network, tf_session=self.sess, config=self.config, summary=self.summary) """ RL Agent """ self.agent = QSigma(environment=self.env, function_approximator=self.function_approximator, target_policy=self.target_policy, behaviour_policy=self.behaviour_policy, config=self.config, summary=self.summary) if experiment_arguments.restore_agent: saver = tf.train.Saver() sourcepath = os.path.join(dir_name, "agent_graph.ckpt") saver.restore(self.sess, sourcepath) print("Model restored from file: %s" % sourcepath)
class ExperimentAgent(): def __init__(self, args): """ Agent's Parameters """ self.n = args.n self.sigma = args.sigma self.beta = args.beta self.alpha = np.float64(args.alpha) / NUMBER_OF_TILINGS """ Experiment Configuration """ self.config = Config() self.summary = { } # self.summary will contain the following keys: return_per_episode, steps_per_episode self.config.save_summary = True " Environment Parameters " self.config.max_actions = 100000 self.config.num_actions = 3 # Number actions in Mountain Car self.config.obs_dims = [ 2 ] # Dimensions of the observations experienced by the agent " TileCoder Parameters " self.config.num_tilings = NUMBER_OF_TILINGS self.config.tiling_side_length = 8 self.config.num_dims = 2 self.config.alpha = self.alpha " Policies Parameters " self.config.target_policy = Config() self.config.target_policy.initial_epsilon = 0.1 self.config.target_policy.anneal_epsilon = False self.config.target_policy.annealing_period = 0 self.config.target_policy.final_epsilon = 0.1 self.config.anneal_steps_count = 0 " QSigma Agent " self.config.n = self.n self.config.gamma = 1 self.config.beta = self.beta self.config.sigma = self.sigma self.config.use_er_buffer = False self.config.initial_rand_steps = 0 self.config.rand_steps_count = 0 " Environment " self.env = MountainCliff(config=self.config, summary=self.summary) """ Policies """ self.target_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=False) """ TileCoder """ self.function_approximator = TileCoderFA(self.config) """ RL Agent """ self.agent = QSigma(function_approximator=self.function_approximator, target_policy=self.target_policy, behaviour_policy=self.target_policy, environment=self.env, config=self.config, summary=self.summary) def train(self): self.agent.train(num_episodes=1) def get_episode_number(self): return len(self.summary['steps_per_episode']) def save_parameters(self, dir_name): txt_file_pathname = os.path.join(dir_name, "agent_parameters.txt") params_txt = open(txt_file_pathname, "w") params_txt.write("# Agent #\n") params_txt.write("\tn = " + str(self.agent.n) + "\n") params_txt.write("\tgamma = " + str(self.agent.gamma) + "\n") params_txt.write("\tsigma = " + str(self.agent.sigma) + "\n") params_txt.write("\tbeta = " + str(self.agent.beta) + "\n") params_txt.write("\talpha = " + str(self.alpha) + "\n") params_txt.write("\n") params_txt.close() def get_train_data(self): return self.summary["return_per_episode"] def get_number_of_steps(self): return np.sum(self.summary["steps_per_episode"])
def setUp(self): config = Config() homepath = "/home/jfernando/" self.games_directory = homepath + "PycharmProjects/RL_Experiments/Experiments_Engine/Environments/Arcade_Learning_Environment/Supported_Roms/" self.rom_name = "seaquest.bin" self.summary = {} config.save_summary = True """ Environment Parameters """ config.display_screen = False config.frame_skip = 4 config.agent_render = False config.repeat_action_probability = 0.25 config.frame_stack = 4 config.num_actions = 18 # Number of actions in the ALE config.obs_dims = [config.frame_stack, 84, 84] # [stack_size, height, width] " Models Parameters " config.dim_out = [32, 64, 64, 512] config.filter_dims = [8, 4, 3] config.strides = [4, 2, 1] config.gate_fun = tf.nn.relu config.conv_layers = 3 config.full_layers = 1 config.max_pool = False config.frames_format = "NHWC" # NCHW doesn't work with cpu in tensorflow " Policies Parameters " " Target Policy " config.target_policy = Config() config.target_policy.initial_epsilon = 0.1 config.target_policy.anneal_epsilon = False " Behaviour Policy " config.behaviour_policy = Config() config.behaviour_policy.initial_epsilon = 0.2 config.behaviour_policy.anneal_epsilon = True config.behaviour_policy.final_epsilon = 0.1 config.behaviour_policy.annealing_period = 100 " Experience Replay Buffer Parameters " config.buff_sz = 100000 config.batch_sz = 32 config.env_state_dims = (84, 84) # Dimensions of a frame config.reward_clipping = True " QSigma Agent Parameters " config.n = 3 config.gamma = 0.99 config.beta = 1.0 config.sigma = 0.5 config.use_er_buffer = True config.initial_rand_steps = 50 config.rand_steps_count = 0 " Neural Network " config.alpha = 0.00025 config.tnetwork_update_freq = 10000 " Agent's Parameters " self.n = 3 self.gamma = 0.99 " Environment " self.env = ALE_Environment(config=config, games_directory=self.games_directory, rom_filename=self.rom_name, summary=self.summary) self.target_network = Model_nCPmFO(config=config, name="target") self.update_network = Model_nCPmFO(config=config, name="update") """ Target Policy """ self.target_policy = EpsilonGreedyPolicy(config, behaviour_policy=False) """ Behaviour Policy """ self.behavior_policy = EpsilonGreedyPolicy(config, behaviour_policy=True) """ Return Function """ return_function = OffPolicyQSigmaReturnFunction(config=config, tpolicy=self.target_policy) """ Experience Replay Buffer """ er_buffer = OffPolicyQSigmaExperienceReplayBuffer(config, return_function=return_function) """ Neural Network """ alpha = 0.00025 tnetwork_update_freq = 10000 batch_size = 32 optimizer = lambda lr: tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.95, epsilon=0.01, momentum=0.95) tf_sess = tf.Session() self.function_approximator = NeuralNetwork_wER_FA(optimizer=optimizer, target_network=self.target_network, update_network=self.update_network, er_buffer=er_buffer, tf_session=tf_sess, config=config, summary=self.summary) """ RL Agent """ self.agent = QSigma(environment=self.env, function_approximator=self.function_approximator, target_policy=self.target_policy, behaviour_policy=self.behavior_policy, er_buffer=er_buffer, config=config, summary=self.summary) davariables = self.target_network.get_variables_as_list(tf_session=tf_sess) total_parameters = 0 for davar in davariables: total_parameters += np.array(davar).size print("The total number of parameters in the network is:", total_parameters)
def __init__(self, experiment_parameters, restore=False, restore_data_dir=""): self.tf_sess = tf.Session() self.optimizer = lambda lr: tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.95, epsilon=0.01, momentum=0.95, centered=True) """ Agent's Parameters """ self.n = experiment_parameters["n"] self.sigma = experiment_parameters["sigma"] ###### sigma decay parameters ###### self.beta = experiment_parameters["beta"] self.decay_type = experiment_parameters["decay_type"] self.decay_freq = experiment_parameters["decay_freq"] self.sigma_min = experiment_parameters['sigma_min'] #################################### self.target_epsilon = experiment_parameters['target_epsilon'] self.compute_bprobabilities = experiment_parameters[ 'compute_bprobabilities'] self.anneal_epsilon = experiment_parameters['anneal_epsilon'] self.store_sigma = experiment_parameters['store_sigma'] self.tnetwork_update_freq = experiment_parameters[ 'tnetwork_update_freq'] if restore: with open(os.path.join(restore_data_dir, 'experiment_config.p'), mode='rb') as experiment_config_file: self.config = pickle.load(experiment_config_file) with open(os.path.join(restore_data_dir, "summary.p"), mode='rb') as summary_file: self.summary = pickle.load(summary_file) else: """ Experiment Configuration """ self.config = Config() self.summary = {} self.config.save_summary = True " Environment Parameters " self.config.max_actions = 5000 self.config.num_actions = 3 # Number actions in Mountain Car self.config.obs_dims = [ 2 ] # Dimensions of the observations experienced by the agent " Model Parameters " self.config.dim_out = [1000] self.config.gate_fun = tf.nn.relu self.config.full_layers = 1 " Neural Network Parameters " self.config.alpha = 0.00025 self.config.batch_sz = 32 self.config.tnetwork_update_freq = self.tnetwork_update_freq # Default: 0.05 * buff_sz = 1,000 " Experience Replay Buffer Parameters " self.config.buff_sz = 20000 self.config.env_state_dims = [ 2 ] # Dimensions of the environment's states self.config.obs_dtype = np.float32 ###### sigma decay parameters ###### self.config.sigma_decay = self.beta self.config.decay_type = self.decay_type self.config.decay_freq = self.decay_freq self.config.sigma_min = self.sigma_min #################################### self.config.sigma = self.sigma self.config.store_bprobs = not self.compute_bprobabilities self.config.store_sigma = self.store_sigma self.config.store_return = not self.anneal_epsilon " Policies Parameters " self.config.target_policy = Config() self.config.target_policy.initial_epsilon = self.target_epsilon self.config.target_policy.anneal_epsilon = False self.config.behaviour_policy = Config() if self.anneal_epsilon: self.config.behaviour_policy.initial_epsilon = 1 self.config.behaviour_policy.final_epsilon = 0.1 self.config.behaviour_policy.anneal_epsilon = True self.config.behaviour_policy.annealing_period = 20000 # Buffer size else: self.config.behaviour_policy.initial_epsilon = 0.1 self.config.behaviour_policy.anneal_epsilon = False self.config.behaviour_policy.annealing_period = 20000 # Buffer size " QSigma Agent " self.config.n = self.n self.config.gamma = 1 self.config.use_er_buffer = True self.config.initial_rand_steps = 1000 # 0.05 * buffer_size " QSigma Return Function " self.config.compute_bprobs = self.compute_bprobabilities self.config.onpolicy = self.compute_bprobabilities and not self.anneal_epsilon self.config.rand_steps_count = 0 " Environment " self.env = MountainCar(config=self.config, summary=self.summary) " Models " self.tnetwork = Model_mFO(config=self.config, name='target') self.unetwork = Model_mFO(config=self.config, name='update') """ Policies """ self.target_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=False) self.behaviour_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=True) """ QSigma Return Function """ self.rl_return_fun = QSigmaReturnFunction( config=self.config, tpolicy=self.target_policy, bpolicy=self.behaviour_policy) """ QSigma Replay Buffer """ self.qsigma_erp = QSigmaExperienceReplayBuffer( config=self.config, return_function=self.rl_return_fun) """ Neural Network """ self.function_approximator = NeuralNetwork_wER_FA( optimizer=self.optimizer, target_network=self.tnetwork, update_network=self.unetwork, er_buffer=self.qsigma_erp, tf_session=self.tf_sess, config=self.config, summary=self.summary) """ RL Agent """ self.agent = QSigma(function_approximator=self.function_approximator, target_policy=self.target_policy, behaviour_policy=self.behaviour_policy, environment=self.env, er_buffer=self.qsigma_erp, config=self.config, summary=self.summary) # number_of_parameters = 0 # for variable in self.tnetwork.get_variables_as_list(self.tf_sess): # number_of_parameters += np.array(variable).flatten().size # print("The number of parameters in the network is:", number_of_parameters) # Answer: 6003 if restore: saver = tf.train.Saver() sourcepath = os.path.join(restore_data_dir, "agent_graph.ckpt") saver.restore(self.tf_sess, sourcepath) print("Model restored from file: %s" % sourcepath)
class ExperimentAgent(): def __init__(self, experiment_parameters, restore=False, restore_data_dir=""): self.tf_sess = tf.Session() self.optimizer = lambda lr: tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.95, epsilon=0.01, momentum=0.95, centered=True) """ Agent's Parameters """ self.n = experiment_parameters["n"] self.sigma = experiment_parameters["sigma"] ###### sigma decay parameters ###### self.beta = experiment_parameters["beta"] self.decay_type = experiment_parameters["decay_type"] self.decay_freq = experiment_parameters["decay_freq"] self.sigma_min = experiment_parameters['sigma_min'] #################################### self.target_epsilon = experiment_parameters['target_epsilon'] self.compute_bprobabilities = experiment_parameters[ 'compute_bprobabilities'] self.anneal_epsilon = experiment_parameters['anneal_epsilon'] self.store_sigma = experiment_parameters['store_sigma'] self.tnetwork_update_freq = experiment_parameters[ 'tnetwork_update_freq'] if restore: with open(os.path.join(restore_data_dir, 'experiment_config.p'), mode='rb') as experiment_config_file: self.config = pickle.load(experiment_config_file) with open(os.path.join(restore_data_dir, "summary.p"), mode='rb') as summary_file: self.summary = pickle.load(summary_file) else: """ Experiment Configuration """ self.config = Config() self.summary = {} self.config.save_summary = True " Environment Parameters " self.config.max_actions = 5000 self.config.num_actions = 3 # Number actions in Mountain Car self.config.obs_dims = [ 2 ] # Dimensions of the observations experienced by the agent " Model Parameters " self.config.dim_out = [1000] self.config.gate_fun = tf.nn.relu self.config.full_layers = 1 " Neural Network Parameters " self.config.alpha = 0.00025 self.config.batch_sz = 32 self.config.tnetwork_update_freq = self.tnetwork_update_freq # Default: 0.05 * buff_sz = 1,000 " Experience Replay Buffer Parameters " self.config.buff_sz = 20000 self.config.env_state_dims = [ 2 ] # Dimensions of the environment's states self.config.obs_dtype = np.float32 ###### sigma decay parameters ###### self.config.sigma_decay = self.beta self.config.decay_type = self.decay_type self.config.decay_freq = self.decay_freq self.config.sigma_min = self.sigma_min #################################### self.config.sigma = self.sigma self.config.store_bprobs = not self.compute_bprobabilities self.config.store_sigma = self.store_sigma self.config.store_return = not self.anneal_epsilon " Policies Parameters " self.config.target_policy = Config() self.config.target_policy.initial_epsilon = self.target_epsilon self.config.target_policy.anneal_epsilon = False self.config.behaviour_policy = Config() if self.anneal_epsilon: self.config.behaviour_policy.initial_epsilon = 1 self.config.behaviour_policy.final_epsilon = 0.1 self.config.behaviour_policy.anneal_epsilon = True self.config.behaviour_policy.annealing_period = 20000 # Buffer size else: self.config.behaviour_policy.initial_epsilon = 0.1 self.config.behaviour_policy.anneal_epsilon = False self.config.behaviour_policy.annealing_period = 20000 # Buffer size " QSigma Agent " self.config.n = self.n self.config.gamma = 1 self.config.use_er_buffer = True self.config.initial_rand_steps = 1000 # 0.05 * buffer_size " QSigma Return Function " self.config.compute_bprobs = self.compute_bprobabilities self.config.onpolicy = self.compute_bprobabilities and not self.anneal_epsilon self.config.rand_steps_count = 0 " Environment " self.env = MountainCar(config=self.config, summary=self.summary) " Models " self.tnetwork = Model_mFO(config=self.config, name='target') self.unetwork = Model_mFO(config=self.config, name='update') """ Policies """ self.target_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=False) self.behaviour_policy = EpsilonGreedyPolicy(self.config, behaviour_policy=True) """ QSigma Return Function """ self.rl_return_fun = QSigmaReturnFunction( config=self.config, tpolicy=self.target_policy, bpolicy=self.behaviour_policy) """ QSigma Replay Buffer """ self.qsigma_erp = QSigmaExperienceReplayBuffer( config=self.config, return_function=self.rl_return_fun) """ Neural Network """ self.function_approximator = NeuralNetwork_wER_FA( optimizer=self.optimizer, target_network=self.tnetwork, update_network=self.unetwork, er_buffer=self.qsigma_erp, tf_session=self.tf_sess, config=self.config, summary=self.summary) """ RL Agent """ self.agent = QSigma(function_approximator=self.function_approximator, target_policy=self.target_policy, behaviour_policy=self.behaviour_policy, environment=self.env, er_buffer=self.qsigma_erp, config=self.config, summary=self.summary) # number_of_parameters = 0 # for variable in self.tnetwork.get_variables_as_list(self.tf_sess): # number_of_parameters += np.array(variable).flatten().size # print("The number of parameters in the network is:", number_of_parameters) # Answer: 6003 if restore: saver = tf.train.Saver() sourcepath = os.path.join(restore_data_dir, "agent_graph.ckpt") saver.restore(self.tf_sess, sourcepath) print("Model restored from file: %s" % sourcepath) def train(self): self.agent.train(num_episodes=1) self.function_approximator.store_in_summary() def get_number_of_frames(self): return np.sum(self.summary['steps_per_episode']) def get_episode_number(self): return len(self.summary['steps_per_episode']) def get_train_data(self): return_per_episode = self.summary['return_per_episode'] nn_loss = self.summary['cumulative_loss'] return return_per_episode, nn_loss def save_agent(self, dir_name): with open(os.path.join(dir_name, 'experiment_config.p'), mode='wb') as experiment_config_file: pickle.dump(self.config, experiment_config_file) with open(os.path.join(dir_name, "summary.p"), mode='wb') as summary_file: pickle.dump(self.summary, summary_file) saver = tf.train.Saver() save_path = saver.save(self.tf_sess, os.path.join(dir_name, "agent_graph.ckpt")) print("Model saved in file: %s" % save_path) def save_results(self, dir_name): env_info = np.cumsum(self.summary['steps_per_episode']) return_per_episode = self.summary['return_per_episode'] total_loss_per_episode = self.summary['cumulative_loss'] results = { 'return_per_episode': return_per_episode, 'env_info': env_info, 'total_loss_per_episode': total_loss_per_episode } with open(os.path.join(dir_name, 'results.p'), mode="wb") as results_file: pickle.dump(results, results_file) def save_parameters(self, dir_name): txt_file_pathname = os.path.join(dir_name, "agent_parameters.txt") params_txt = open(txt_file_pathname, "w") params_txt.write("# Agent #\n") params_txt.write("\tn = " + str(self.config.n) + "\n") params_txt.write("\tgamma = " + str(self.config.gamma) + "\n") params_txt.write("\tsigma = " + str(self.config.sigma) + "\n") params_txt.write("\tbeta = " + str(self.config.sigma_decay) + "\n") params_txt.write("\trandom steps before training = " + str(self.config.initial_rand_steps) + "\n") params_txt.write("\tcompute behaviour policy's probabilities = " + str(self.config.compute_bprobs) + "\n") params_txt.write("\n") assert isinstance(self.target_policy, EpsilonGreedyPolicy) params_txt.write("# Target Policy #\n") params_txt.write("\tinitial epsilon = " + str(self.config.target_policy.initial_epsilon) + "\n") params_txt.write("\tfinal epsilon = " + str(self.config.target_policy.final_epsilon) + "\n") params_txt.write("\n") assert isinstance(self.behaviour_policy, EpsilonGreedyPolicy) params_txt.write("# Behaviour Policy #\n") params_txt.write("\tinitial epsilon = " + str(self.config.behaviour_policy.initial_epsilon) + "\n") params_txt.write("\tanneal epsilon = " + str(self.config.behaviour_policy.anneal_epsilon) + "\n") params_txt.write("\tfinal epsilon = " + str(self.config.behaviour_policy.final_epsilon) + "\n") params_txt.write("\tannealing period = " + str(self.config.behaviour_policy.annealing_period) + "\n") params_txt.write("\n") params_txt.write( "# Function Approximator: Neural Network with Experience Replay #\n" ) params_txt.write("\talpha = " + str(self.config.alpha) + "\n") params_txt.write("\ttarget network update frequency = " + str(self.config.tnetwork_update_freq) + "\n") params_txt.write("\tbatch size = " + str(self.config.batch_sz) + "\n") params_txt.write("\tbuffer size = " + str(self.config.buff_sz) + "\n") params_txt.write("\tfully connected layers = " + str(self.config.full_layers) + "\n") params_txt.write("\toutput dimensions per layer = " + str(self.config.dim_out) + "\n") params_txt.write("\tgate function = " + str(self.config.gate_fun) + "\n") params_txt.write("\n") params_txt.write("\tstore_sigma = " + str(self.store_sigma)) params_txt.close()