def overfit_small_data(plot=False): print_formatted('Overfitting small data', 'stage') num_train = 50 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } weight_scale = 3e-2 learning_rate = 1e-3 update_rule = 'adam' model = FullyConnectedNet(input_dim=3072, hidden_dims=[100, 100], num_classes=10, weight_scale=weight_scale) solver = Solver(model, small_data, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, lr_decay=0.95, num_epochs=20, batch_size=25, print_every=10) solver.train() if plot: plot_stats('loss', solvers={'fc_net': solver}, filename='overfitting_loss_history.png')
def conv_net_overfitting(plot=False): print_formatted('Overfitting small data with convnet', 'stage') np.random.seed(231) num_train = 100 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } small_data['X_train'] = small_data['X_train'].reshape( (small_data['X_train'].shape[0], 32, 32, 3)).transpose(0, 3, 1, 2) small_data['X_val'] = small_data['X_val'].reshape( (small_data['X_val'].shape[0], 32, 32, 3)).transpose(0, 3, 1, 2) model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=15, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, print_every=1) solver.train() if plot: plot_stats('loss', 'train_val_acc', solvers={'convnet': solver}, filename='convnet_overfitting.png')
def train_with_layernorm(plot=False): print_formatted('Layer normalization', 'stage') hidden_dims = [100, 100, 100, 100, 100] weight_scale = 2e-2 num_train = 1000 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } print_formatted('without layernorm', 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=hidden_dims, num_classes=10, weight_scale=weight_scale) solver = Solver(model, small_data, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, num_epochs=10, batch_size=50, print_every=20) solver.train() print() print_formatted('with layernorm', 'bold', 'blue') ln_model = FullyConnectedNet(input_dim=3072, hidden_dims=hidden_dims, num_classes=10, weight_scale=weight_scale, normalization='layernorm') ln_solver = Solver(ln_model, small_data, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, num_epochs=10, batch_size=50, print_every=20) ln_solver.train() if plot: plot_stats('loss', 'train_acc', 'val_acc', solvers={ 'baseline': solver, 'with_norm': ln_solver }, filename='layernorm.png')
def compare_update_rules(plot=False): print_formatted('Update rules', 'stage') num_train = 4000 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } learning_rates = { 'sgd': 1e-2, 'sgd_momentum': 1e-2, 'nesterov_momentum': 1e-2, 'adagrad': 1e-4, 'rmsprop': 1e-4, 'adam': 1e-3 } solvers = {} for update_rule in [ 'sgd', 'sgd_momentum', 'nesterov_momentum', 'adagrad', 'rmsprop', 'adam' ]: print_formatted('running with ' + update_rule, 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=[100] * 5, num_classes=10, weight_scale=5e-2) solver = Solver(model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={ 'learning_rate': learning_rates[update_rule], }, verbose=True) solvers[update_rule] = solver solver.train() print() if plot: plot_stats('loss', 'train_acc', 'val_acc', solvers=solvers, filename='update_rules_comparison.png')
def train_with_dropout(plot=False): print_formatted('Dropout', 'stage') np.random.seed(231) num_train = 500 small_data = { 'X_train': X_train[:num_train], 'y_train': y_train[:num_train], 'X_val': X_val, 'y_val': y_val, } solvers = {} dropout_choices = [1, 0.25] for dropout in dropout_choices: if dropout == 1: print_formatted('without dropout, p = 1', 'bold', 'blue') else: print_formatted('with dropout, p = %.2f' % dropout, 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=[500], num_classes=10, dropout=dropout) solver = Solver(model, small_data, update_rule='adam', optim_config={ 'learning_rate': 5e-4, }, num_epochs=25, batch_size=100, print_every=100) solver.train() solvers[dropout] = solver if dropout == 1: print() if plot: plot_stats('train_acc', 'val_acc', solvers={ '1.00 dropout': solvers[1], '0.25 dropout': solvers[0.25] }, filename='dropout.png')
def test(self, env, render=True): obs, done, ep_reward = env.reset(), False, 0 stats = [] action = [0] while not done: stats.append({'t': env.task.t, 'q': obs[0], 'q_ref': env.task.get_q_ref(), 'a1': obs[1], 'u': action[0]}) action, _ = self.model.action_value(obs[None, :]) obs, reward, done = env.step(action[0]) ep_reward += reward if render: df = pd.DataFrame(stats) plot_stats(df) return ep_reward
def train_best_fc_model(plot=False): print_formatted('Best fully connected net', 'stage') hidden_dims = [100, 100, 100] weight_scale = 2e-2 num_epochs = 10 dropout = 1 data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val, 'X_test': X_test, 'y_test': y_test, } print_formatted('training', 'bold', 'blue') model = FullyConnectedNet(input_dim=3072, hidden_dims=hidden_dims, num_classes=10, weight_scale=weight_scale, normalization='batchnorm', dropout=dropout) solver = Solver(model, data, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, num_epochs=num_epochs, batch_size=50, print_every=100) solver.train() print() if plot: plot_stats('loss', 'train_val_acc', solvers={'best_fc': solver}) print_formatted('evaluating', 'bold', 'blue') y_test_pred = np.argmax(model.loss(data['X_test']), axis=1) y_val_pred = np.argmax(model.loss(data['X_val']), axis=1) print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean()) print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())
def train_two_layer(plot=False): print_formatted('Two layer net', 'stage') model = TwoLayerNet(input_dim=3072, hidden_dim=100, num_classes=10) data = { 'X_train': X_train, 'y_train': y_train, 'X_val': X_val, 'y_val': y_val } solver = Solver(model, data, num_epochs=1, print_every=100, batch_size=100, lr_decay=0.95) solver.train() if plot: plot_stats('loss', 'train_val_acc', solvers={'two_layer_net': solver}, filename='two_layer_net_stats.png')
def evolve(M, H, plot=False): env = simpy.Environment() env.persons = [] env.obs_pop = {'F': [], 'M': []} env.new_persons = [] env.wanting = {'F': [], 'M': []} env.borns = [] env.deaths = [] env.average_age = [] env.couples = [] for i in range(M): w = Woman(i) env.persons.append(w) w.age = rnd.randint(0, 1200) j = len(env.persons) for i in range(H): m = Man(i + j) env.persons.append(m) m.age = rnd.randint(0, 1200) env.idx = M + H env.process(live_generator(env)) env.run(G.max_time) tot_pop = utils.__elem_sum__(env.obs_pop['F'], env.obs_pop['M']) if plot: env.average_age = [ elem / (tot_pop[i] * 12) for i, elem in enumerate(env.average_age) ] plotting.plot_stats(env) return tot_pop
def sarsa(env, num_episodes, discount_factor=1.0, alpha=0.5, epsilon=0.1, max_episode_length=20, start_Q=None): """ SARSA algorithm: on-policy TD control, finds the optimal epsilon-greedy policy :param env: :param num_episodes: :param discount_factor: :param alpha: :param epsilon: :return: """ # The (final) action-value function, nested dict if start_Q is not None: Q = start_Q else: Q = np.zeros((len(env.q_space), len(env.qe_space), len(env.a1_space), len(env.action_space))) # Episode statistics stats = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes)) # Policy-to-follow: policy = make_epsilon_greedy_policy(Q, epsilon, len(env.action_space)) # Run through the episodes for i_episode in range(num_episodes): if (i_episode + 1) % 100 == 0: print("\rEpisode {}/{}".format(i_episode + 1, num_episodes), end="") sys.stdout.flush() state = env.reset() action_probs = policy(state) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) intermediate_stats = [] for step in itertools.count(): t = step * env.dt q_ref = 10 * np.pi / 180 * np.sin(t * 2 * np.pi / 5) # Perform action: next_state, reward, done, _ = env.step(action) # Based on results, pick the next action: next_action_probs = policy(next_state) next_action = np.random.choice(np.arange(len(next_action_probs)), p=next_action_probs) # Update statistics from reward etc stats.episode_lengths[i_episode] = t stats.episode_rewards[i_episode] += reward # TD update: td_target = reward + discount_factor * Q[next_state][next_action] td_delta = td_target - Q[state][action] Q[state][action] += alpha * td_delta if (i_episode + 1) % 1000 == 0: intermediate_stats.append({ 't': t, 'q_ref': q_ref, 'u': env.action_space[action], 'q': env.state[0], 'a1': env.state[1] }) if done or t >= (max_episode_length / env.dt): break state = next_state action = next_action if len(intermediate_stats) > 0: df = pd.DataFrame(intermediate_stats) plotting.plot_stats(df, env, str(i_episode + 1)) return Q, stats, intermediate_stats
#set task environments with fixed hidden states within each block task = RevLearn(O, S, D, blocks = blocks, T = T) task.set_hidden_states() for j,q in enumerate(['IRI', 'RRI']): for k, mean in enumerate(np.arange(10,31)): correct_choices, responses, hidden_states = \ generate_behavior(q, task, mean, d, state_transition_matrix) performance[i,j,k] = correct_choices.mean(axis = -1) choice_prob[i,j,k] = get_choice_probability(correct_choices, hidden_states[:,:,1] == 0) #plot stats fig1 = plot_stats(performance, choice_prob) fig1.savefig('Fig5.pdf', bbox_inches = 'tight', transparent = True) fig1.savefig('Fig5.png', bbox_inches = 'tight', transparent = True, dpi = 600) ############################################################################### ####################run simulations for all agents############################# labels = np.array(['IRI', 'RRI', 'SU', 'DU']) performance = np.zeros((2,len(labels),blocks)) choice_prob = np.zeros((2,len(labels),21)) for i,s in enumerate(['irregular', 'semi-regular']): mu = 20 if s == 'irregular': sigma = mu*(mu-1)
#experiment_ids = ['CartPole_none_m0_lr025', 'CartPole_single_m0_lr025', 'CartPole_per-layer_m0_lr025', 'CartPole_per-weight_m0_lr025'] #experiment_ids = ['MNIST_none_no_rt', 'MNIST_single_no_rt', 'MNIST_pl_no_rt', 'MNIST_pw_no_rt'] #experiment_ids = ['CartPole_none_no_rt', 'CartPole_single_no_rt', 'CartPole_per-layer_no_rt', 'CartPole_per-weight_no_rt'] #experiment_ids = ['MNIST_none_mx300', 'MNIST_single_mx300', 'MNIST_pl_mx300', 'MNIST_pw_mx300'] #experiment_ids = ['Seaquest_none_m0_lr025', 'Seaquest_single_m0_lr025', 'Seaquest_pl_m0_lr025', 'Seaquest_pw_m0_lr025'] this_file_dir_local1 = os.path.dirname(os.path.abspath(__file__)) package_root_this_file1 = fs.get_parent(this_file_dir_local1, 'es-rl') d1 = os.path.join(package_root_this_file1, 'experiments', 'checkpoints') experiment_ids = os.listdir(d1) for experiment_id in experiment_ids: this_file_dir_local = os.path.dirname(os.path.abspath(__file__)) package_root_this_file = fs.get_parent(this_file_dir_local, 'es-rl') d = os.path.join(package_root_this_file, 'experiments', 'checkpoints', experiment_id) directories = [ os.path.join(d, di) for di in os.listdir(d) if os.path.isdir(os.path.join(d, di)) ] directories = [ d for d in directories if 'monitoring' not in d and 'analysis' not in d ] # Create result directory dst_dir = '/home/lorenzo/MEGA/UNI/MSc/Master\ Thesis/repo/graphics' + experiment_id + '-analysis' result_dir = os.path.join(d, experiment_id + '-analysis') for dirs in directories: plot.plot_stats(dirs + '/stats.csv', dirs)
def train(mode: str, env_params: dict, ac_params: dict, rls_params: dict, pid_params: dict, results_path: str, seed=0, return_logs=True, save_logs=False, save_weights=False, weight_save_interval: int = 10, save_agents=False, load_agents=False, agents_path="", plot_states=True, plot_nn_weights=False, plot_rls=False): """ Trains the integrated IDHP agent in the 6DOF environment for a single episode. :param mode: str indicating what task the agent should perform: train, test_1, or test_2 :param env_params: dict, relevant parameters for environment setup :param ac_params: dict, relevant parameters for actor-critic setup :param rls_params: dict, relevant parameters for RLS estimator setup :param pid_params: relevant parameters for PID setup :param results_path: Save path for the training logs :param seed: Random seed for initialization :param return_logs: Return the logs as function output? :param save_logs: Save the logs to file? :param save_weights: Save the weights in the logger? Useful for debugging :param weight_save_interval: Number of timesteps between saving the neural network weights in the logger :param save_agents: Save the trained agents to file after training? :param load_agents: Load pre-trained agents from file before starting the tasks? :param agents_path: Save or load path for trained agents. :param plot_states: Plot the states? :param plot_nn_weights: Plot neural network weights after training? (Warning: takes a while) :param plot_rls: Plot the RLS estimator gradients after training? :return: Can return various tuples, depending on above settings """ torch.manual_seed(seed) np.random.seed(seed) # Environment env = Helicopter6DOF(dt=env_params['dt'], t_max=env_params['t_max']) trim_state, trim_actions = env.trim( trim_speed=env_params['initial_velocity'], flight_path_angle=env_params['initial_flight_path_angle'], altitude=env_params['initial_altitude']) observation = trim_state.copy() ref = trim_state.copy() ref_generator = RefGenerator(T=10, dt=env_params["dt"], A=10, u_ref=0, t_switch=60, filter_tau=2.5) # Logging logger = Logger(params=ac_params) # Agents: agent_col = DHPAgent(**ac_params['col']) agent_lon = DHPAgent(**ac_params['lon']) if load_agents: agent_col.load(agents_path + "col.pt") agent_lon.load(agents_path + "lon.pt") with open(agents_path + "rls.pkl", 'rb') as f: rls_estimator = pickle.load(f) else: # incremental RLS estimator rls_estimator = RecursiveLeastSquares(**rls_params) agents = [agent_col, agent_lon] # Create controllers lateral_pid = LatPedPID(phi_trim=trim_state[6], lat_trim=trim_actions[2], pedal_trim=trim_actions[3], dt=env_params["dt"], gains_dict=pid_params) collective_pid = CollectivePID6DOF(col_trim=trim_actions[0], h_ref=env_params['initial_altitude'], dt=env_params['dt'], proportional_gain=pid_params['Kh']) # Excitation signal for the RLS estimator excitation = np.load('excitation.npy') # Flags excitation_phase = False if load_agents else True update_col = True if load_agents else False update_lon = True success = True rewards = np.zeros(2) def update_agent(n): """ Shorthand to update a single numbered agent after a single transition. :param n: Index of agent to update, per list 'agents' (0=col, 1=lon) """ rewards[n], dr_ds = agents[n].get_reward(next_observation, ref) F, G = agents[n].get_transition_matrices(rls_estimator) agents[n].update_networks(observation, next_observation, ref, next_ref, dr_ds, F, G) # Main loop for step in itertools.count(): lateral_cyclic, pedal = lateral_pid(observation) # TODO: It would be much nicer if reference generation would be an internal thing in the environment I guess if mode == "train": if step == 0: ref_generator.set_task(task="train_lon", t=0, obs=observation, velocity_filter_target=0) ref = ref_generator.get_ref(observation, env.t) elif step == 1000: excitation_phase = False elif step == env_params['step_switch']: agent_lon.learning_rate_actor *= 0.1 agent_lon.learning_rate_critic *= 0.1 update_col = True ref_generator.set_task("train_col", t=env.t, obs=observation, z_start=observation[11]) # Get ref, action, take action if step < env_params['step_switch']: actions = np.array([ collective_pid(observation), trim_actions[1] - 0.5 + agent_lon.get_action(observation, ref), lateral_cyclic, pedal ]) else: actions = np.array([ trim_actions[0] - 0.5 + agent_col.get_action(observation, ref), trim_actions[1] - 0.5 + agent_lon.get_action(observation, ref), lateral_cyclic, pedal ]) elif mode == "test_1": if step == 0: ref_generator.set_task(task="hover", t=0, obs=observation) ref = ref_generator.get_ref(observation, env.t) elif step == 500: ref_generator.set_task("velocity", t=env.t, obs=observation, z_start=0, velocity_filter_target=25 - observation[0]) elif step == 2000: ref_generator.set_task("velocity", t=env.t, obs=observation, z_start=0, velocity_filter_target=0 - observation[0]) actions = np.array([ trim_actions[0] - 0.5 + agent_col.get_action(observation, ref), trim_actions[1] - 0.5 + agent_lon.get_action(observation, ref), lateral_cyclic, pedal ]) elif mode == "test_2": if step == 0: ref_generator.set_task(task="descent", t=0, t_switch=0, obs=observation) ref = ref_generator.get_ref(observation, env.t) elif step == 1000: env.set_engine_status(n_engines_available=1, transient=True) actions = np.array([ trim_actions[0] - 0.5 + agent_col.get_action(observation, ref), trim_actions[1] - 0.5 + agent_lon.get_action(observation, ref), lateral_cyclic, pedal ]) else: raise NotImplementedError("Training mode unknown. ") if excitation_phase: actions += excitation[step] actions = np.clip(actions, 0, 1) # Take step in the environment next_observation, _, done = env.step(actions) if env.t < 20: ref_generator.A = 10 elif 20 <= env.t < 40: ref_generator.A = 15 else: ref_generator.A = 20 next_ref = ref_generator.get_ref(observation, env.t) # Update RLS estimator, rls_estimator.update(observation, actions[:2], next_observation) # Collective: if update_col: update_agent(0) else: rewards[0] = 0 # Cyclic if update_lon: update_agent(1) else: rewards[1] = 0 logger.log_states(env.t, observation, ref, actions, rewards, env.P_available, env.P_out) if save_weights and (step % weight_save_interval == 0): logger.log_weights(env.t, agents, rls_estimator) if envelope_limits_reached(observation)[0]: print("Save envelope limits reached, stopping simulation. Seed: " + str(seed)) print("Cause of violation: " + envelope_limits_reached(observation)[1]) success = False done = True if np.isnan(actions).any(): print("NaN encounted in actions at timestep", step, " -- ", actions, "Seed: " + str(seed)) success = False done = True if done or (mode == "test_2" and observation[11] > 0): break # Next step.. observation = next_observation ref = next_ref step += 1 # print("Training time: ", time.time()-t_start) logger.finalize() if save_logs: if not os.path.exists(results_path): os.mkdir(results_path) logger.save(path=results_path + "log.pkl") if save_agents: if not os.path.exists(agents_path): os.mkdir(agents_path) agent_col.save(path=agents_path + "col.pt") agent_lon.save(path=agents_path + "lon.pt") rls_estimator.save(path=agents_path + "rls.pkl") # Visualization if plot_states: plot_stats(logger) if plot_nn_weights and save_weights: plot_neural_network_weights(logger, figsize=(8, 6), agent_name='col', title='Collective') plot_neural_network_weights(logger, figsize=(8, 6), agent_name='lon', title='Longitudinal Cyclic') elif plot_nn_weights and not save_weights: print( "Called plot_nn_weights but no weights were saved (save_weights=False), skipping. " ) if plot_rls and save_weights: plot_rls_weights(logger) elif plot_rls and not save_weights: print( "Called plot_rls_weights but no weights were saved (save_weights=False), skipping. " ) score = np.sqrt(-logger.state_history.iloc[5000:6000]['r2'].sum() / 1000) if return_logs: return logger, score else: if success: return 1, score else: return 0, 0