Exemplo n.º 1
0
def overfit_small_data(plot=False):
    print_formatted('Overfitting small data', 'stage')

    num_train = 50
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    weight_scale = 3e-2
    learning_rate = 1e-3
    update_rule = 'adam'

    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=[100, 100],
                              num_classes=10,
                              weight_scale=weight_scale)
    solver = Solver(model,
                    small_data,
                    update_rule=update_rule,
                    optim_config={'learning_rate': learning_rate},
                    lr_decay=0.95,
                    num_epochs=20,
                    batch_size=25,
                    print_every=10)
    solver.train()

    if plot:
        plot_stats('loss',
                   solvers={'fc_net': solver},
                   filename='overfitting_loss_history.png')
Exemplo n.º 2
0
def conv_net_overfitting(plot=False):
    print_formatted('Overfitting small data with convnet', 'stage')

    np.random.seed(231)

    num_train = 100
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }
    small_data['X_train'] = small_data['X_train'].reshape(
        (small_data['X_train'].shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)
    small_data['X_val'] = small_data['X_val'].reshape(
        (small_data['X_val'].shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)

    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=15,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    print_every=1)
    solver.train()

    if plot:
        plot_stats('loss',
                   'train_val_acc',
                   solvers={'convnet': solver},
                   filename='convnet_overfitting.png')
Exemplo n.º 3
0
def train_with_layernorm(plot=False):
    print_formatted('Layer normalization', 'stage')

    hidden_dims = [100, 100, 100, 100, 100]
    weight_scale = 2e-2

    num_train = 1000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    print_formatted('without layernorm', 'bold', 'blue')
    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=hidden_dims,
                              num_classes=10,
                              weight_scale=weight_scale)
    solver = Solver(model,
                    small_data,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    num_epochs=10,
                    batch_size=50,
                    print_every=20)
    solver.train()
    print()

    print_formatted('with layernorm', 'bold', 'blue')
    ln_model = FullyConnectedNet(input_dim=3072,
                                 hidden_dims=hidden_dims,
                                 num_classes=10,
                                 weight_scale=weight_scale,
                                 normalization='layernorm')
    ln_solver = Solver(ln_model,
                       small_data,
                       update_rule='adam',
                       optim_config={
                           'learning_rate': 1e-3,
                       },
                       num_epochs=10,
                       batch_size=50,
                       print_every=20)
    ln_solver.train()

    if plot:
        plot_stats('loss',
                   'train_acc',
                   'val_acc',
                   solvers={
                       'baseline': solver,
                       'with_norm': ln_solver
                   },
                   filename='layernorm.png')
Exemplo n.º 4
0
def compare_update_rules(plot=False):
    print_formatted('Update rules', 'stage')

    num_train = 4000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    learning_rates = {
        'sgd': 1e-2,
        'sgd_momentum': 1e-2,
        'nesterov_momentum': 1e-2,
        'adagrad': 1e-4,
        'rmsprop': 1e-4,
        'adam': 1e-3
    }
    solvers = {}

    for update_rule in [
            'sgd', 'sgd_momentum', 'nesterov_momentum', 'adagrad', 'rmsprop',
            'adam'
    ]:
        print_formatted('running with ' + update_rule, 'bold', 'blue')
        model = FullyConnectedNet(input_dim=3072,
                                  hidden_dims=[100] * 5,
                                  num_classes=10,
                                  weight_scale=5e-2)

        solver = Solver(model,
                        small_data,
                        num_epochs=5,
                        batch_size=100,
                        update_rule=update_rule,
                        optim_config={
                            'learning_rate': learning_rates[update_rule],
                        },
                        verbose=True)
        solvers[update_rule] = solver
        solver.train()
        print()

    if plot:
        plot_stats('loss',
                   'train_acc',
                   'val_acc',
                   solvers=solvers,
                   filename='update_rules_comparison.png')
Exemplo n.º 5
0
def train_with_dropout(plot=False):
    print_formatted('Dropout', 'stage')

    np.random.seed(231)
    num_train = 500
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    solvers = {}
    dropout_choices = [1, 0.25]
    for dropout in dropout_choices:
        if dropout == 1:
            print_formatted('without dropout, p = 1', 'bold', 'blue')
        else:
            print_formatted('with dropout, p = %.2f' % dropout, 'bold', 'blue')

        model = FullyConnectedNet(input_dim=3072,
                                  hidden_dims=[500],
                                  num_classes=10,
                                  dropout=dropout)

        solver = Solver(model,
                        small_data,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        num_epochs=25,
                        batch_size=100,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

        if dropout == 1: print()

    if plot:
        plot_stats('train_acc',
                   'val_acc',
                   solvers={
                       '1.00 dropout': solvers[1],
                       '0.25 dropout': solvers[0.25]
                   },
                   filename='dropout.png')
Exemplo n.º 6
0
    def test(self, env, render=True):
        obs, done, ep_reward = env.reset(), False, 0
        stats = []
        action = [0]

        while not done:
            stats.append({'t': env.task.t, 'q': obs[0], 'q_ref': env.task.get_q_ref(), 'a1': obs[1], 'u': action[0]})

            action, _ = self.model.action_value(obs[None, :])
            obs, reward, done = env.step(action[0])
            ep_reward += reward

        if render:
            df = pd.DataFrame(stats)
            plot_stats(df)


        return ep_reward
Exemplo n.º 7
0
def train_best_fc_model(plot=False):
    print_formatted('Best fully connected net', 'stage')

    hidden_dims = [100, 100, 100]
    weight_scale = 2e-2
    num_epochs = 10
    dropout = 1

    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': X_test,
        'y_test': y_test,
    }

    print_formatted('training', 'bold', 'blue')
    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=hidden_dims,
                              num_classes=10,
                              weight_scale=weight_scale,
                              normalization='batchnorm',
                              dropout=dropout)
    solver = Solver(model,
                    data,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    num_epochs=num_epochs,
                    batch_size=50,
                    print_every=100)
    solver.train()
    print()

    if plot: plot_stats('loss', 'train_val_acc', solvers={'best_fc': solver})

    print_formatted('evaluating', 'bold', 'blue')
    y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
    y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
    print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
    print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())
Exemplo n.º 8
0
def train_two_layer(plot=False):
    print_formatted('Two layer net', 'stage')

    model = TwoLayerNet(input_dim=3072, hidden_dim=100, num_classes=10)
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val
    }
    solver = Solver(model,
                    data,
                    num_epochs=1,
                    print_every=100,
                    batch_size=100,
                    lr_decay=0.95)
    solver.train()

    if plot:
        plot_stats('loss',
                   'train_val_acc',
                   solvers={'two_layer_net': solver},
                   filename='two_layer_net_stats.png')
def evolve(M, H, plot=False):
    env = simpy.Environment()
    env.persons = []
    env.obs_pop = {'F': [], 'M': []}
    env.new_persons = []
    env.wanting = {'F': [], 'M': []}
    env.borns = []
    env.deaths = []
    env.average_age = []
    env.couples = []

    for i in range(M):
        w = Woman(i)
        env.persons.append(w)
        w.age = rnd.randint(0, 1200)

    j = len(env.persons)

    for i in range(H):
        m = Man(i + j)
        env.persons.append(m)
        m.age = rnd.randint(0, 1200)

    env.idx = M + H

    env.process(live_generator(env))
    env.run(G.max_time)
    tot_pop = utils.__elem_sum__(env.obs_pop['F'], env.obs_pop['M'])

    if plot:
        env.average_age = [
            elem / (tot_pop[i] * 12) for i, elem in enumerate(env.average_age)
        ]
        plotting.plot_stats(env)

    return tot_pop
Exemplo n.º 10
0
def sarsa(env,
          num_episodes,
          discount_factor=1.0,
          alpha=0.5,
          epsilon=0.1,
          max_episode_length=20,
          start_Q=None):
    """
    SARSA algorithm: on-policy TD control, finds the optimal epsilon-greedy policy
    :param env:
    :param num_episodes:
    :param discount_factor:
    :param alpha:
    :param epsilon:
    :return:
    """

    # The (final) action-value function, nested dict
    if start_Q is not None:
        Q = start_Q
    else:
        Q = np.zeros((len(env.q_space), len(env.qe_space), len(env.a1_space),
                      len(env.action_space)))

    # Episode statistics
    stats = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes),
                                  episode_rewards=np.zeros(num_episodes))

    # Policy-to-follow:
    policy = make_epsilon_greedy_policy(Q, epsilon, len(env.action_space))
    # Run through the episodes
    for i_episode in range(num_episodes):
        if (i_episode + 1) % 100 == 0:
            print("\rEpisode {}/{}".format(i_episode + 1, num_episodes),
                  end="")
            sys.stdout.flush()

        state = env.reset()
        action_probs = policy(state)
        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
        intermediate_stats = []
        for step in itertools.count():

            t = step * env.dt
            q_ref = 10 * np.pi / 180 * np.sin(t * 2 * np.pi / 5)

            # Perform action:
            next_state, reward, done, _ = env.step(action)

            # Based on results, pick the next action:
            next_action_probs = policy(next_state)
            next_action = np.random.choice(np.arange(len(next_action_probs)),
                                           p=next_action_probs)

            # Update statistics from reward etc
            stats.episode_lengths[i_episode] = t
            stats.episode_rewards[i_episode] += reward

            # TD update:
            td_target = reward + discount_factor * Q[next_state][next_action]
            td_delta = td_target - Q[state][action]
            Q[state][action] += alpha * td_delta
            if (i_episode + 1) % 1000 == 0:
                intermediate_stats.append({
                    't': t,
                    'q_ref': q_ref,
                    'u': env.action_space[action],
                    'q': env.state[0],
                    'a1': env.state[1]
                })

            if done or t >= (max_episode_length / env.dt):
                break
            state = next_state
            action = next_action
        if len(intermediate_stats) > 0:
            df = pd.DataFrame(intermediate_stats)
            plotting.plot_stats(df, env, str(i_episode + 1))

    return Q, stats, intermediate_stats
    
    #set task environments with fixed hidden states within each block
    task = RevLearn(O, S, D, blocks = blocks, T = T)
    task.set_hidden_states()
    
    for j,q in enumerate(['IRI', 'RRI']):
        for k, mean in enumerate(np.arange(10,31)):
            correct_choices, responses, hidden_states = \
                generate_behavior(q, task, mean, d, state_transition_matrix)
            
            performance[i,j,k] = correct_choices.mean(axis = -1)
            choice_prob[i,j,k] = get_choice_probability(correct_choices, 
                                                   hidden_states[:,:,1] == 0)

#plot stats    
fig1 = plot_stats(performance, choice_prob)
fig1.savefig('Fig5.pdf', bbox_inches = 'tight', transparent = True)
fig1.savefig('Fig5.png', bbox_inches = 'tight', transparent = True, dpi = 600)

###############################################################################


####################run simulations for all agents############################# 
labels = np.array(['IRI', 'RRI', 'SU', 'DU'])
performance = np.zeros((2,len(labels),blocks))
choice_prob = np.zeros((2,len(labels),21))

for i,s in enumerate(['irregular', 'semi-regular']):
    mu = 20
    if s == 'irregular':
        sigma = mu*(mu-1)
Exemplo n.º 12
0
#experiment_ids = ['CartPole_none_m0_lr025', 'CartPole_single_m0_lr025', 'CartPole_per-layer_m0_lr025', 'CartPole_per-weight_m0_lr025']

#experiment_ids = ['MNIST_none_no_rt', 'MNIST_single_no_rt', 'MNIST_pl_no_rt', 'MNIST_pw_no_rt']
#experiment_ids = ['CartPole_none_no_rt', 'CartPole_single_no_rt', 'CartPole_per-layer_no_rt', 'CartPole_per-weight_no_rt']

#experiment_ids = ['MNIST_none_mx300', 'MNIST_single_mx300', 'MNIST_pl_mx300', 'MNIST_pw_mx300']
#experiment_ids = ['Seaquest_none_m0_lr025', 'Seaquest_single_m0_lr025', 'Seaquest_pl_m0_lr025', 'Seaquest_pw_m0_lr025']
this_file_dir_local1 = os.path.dirname(os.path.abspath(__file__))
package_root_this_file1 = fs.get_parent(this_file_dir_local1, 'es-rl')
d1 = os.path.join(package_root_this_file1, 'experiments', 'checkpoints')
experiment_ids = os.listdir(d1)

for experiment_id in experiment_ids:
    this_file_dir_local = os.path.dirname(os.path.abspath(__file__))
    package_root_this_file = fs.get_parent(this_file_dir_local, 'es-rl')
    d = os.path.join(package_root_this_file, 'experiments', 'checkpoints',
                     experiment_id)
    directories = [
        os.path.join(d, di) for di in os.listdir(d)
        if os.path.isdir(os.path.join(d, di))
    ]
    directories = [
        d for d in directories if 'monitoring' not in d and 'analysis' not in d
    ]
    # Create result directory
    dst_dir = '/home/lorenzo/MEGA/UNI/MSc/Master\ Thesis/repo/graphics' + experiment_id + '-analysis'
    result_dir = os.path.join(d, experiment_id + '-analysis')

    for dirs in directories:
        plot.plot_stats(dirs + '/stats.csv', dirs)
Exemplo n.º 13
0
def train(mode: str,
          env_params: dict,
          ac_params: dict,
          rls_params: dict,
          pid_params: dict,
          results_path: str,
          seed=0,
          return_logs=True,
          save_logs=False,
          save_weights=False,
          weight_save_interval: int = 10,
          save_agents=False,
          load_agents=False,
          agents_path="",
          plot_states=True,
          plot_nn_weights=False,
          plot_rls=False):
    """
    Trains the integrated IDHP agent in the 6DOF environment for a single episode.

    :param mode: str indicating what task the agent should perform: train, test_1, or test_2
    :param env_params: dict, relevant parameters for environment setup
    :param ac_params: dict, relevant parameters for actor-critic setup
    :param rls_params: dict, relevant parameters for RLS estimator setup
    :param pid_params: relevant parameters for PID setup
    :param results_path: Save path for the training logs
    :param seed: Random seed for initialization
    :param return_logs: Return the logs as function output?
    :param save_logs: Save the logs to file?
    :param save_weights: Save the weights in the logger? Useful for debugging
    :param weight_save_interval: Number of timesteps between saving the neural network weights in the logger
    :param save_agents: Save the trained agents to file after training?
    :param load_agents: Load pre-trained agents from file before starting the tasks?
    :param agents_path: Save or load path for trained agents.
    :param plot_states: Plot the states?
    :param plot_nn_weights: Plot neural network weights after training? (Warning: takes a while)
    :param plot_rls: Plot the RLS estimator gradients after training?

    :return: Can return various tuples, depending on above settings
    """

    torch.manual_seed(seed)
    np.random.seed(seed)

    # Environment
    env = Helicopter6DOF(dt=env_params['dt'], t_max=env_params['t_max'])
    trim_state, trim_actions = env.trim(
        trim_speed=env_params['initial_velocity'],
        flight_path_angle=env_params['initial_flight_path_angle'],
        altitude=env_params['initial_altitude'])
    observation = trim_state.copy()
    ref = trim_state.copy()
    ref_generator = RefGenerator(T=10,
                                 dt=env_params["dt"],
                                 A=10,
                                 u_ref=0,
                                 t_switch=60,
                                 filter_tau=2.5)

    # Logging
    logger = Logger(params=ac_params)

    # Agents:
    agent_col = DHPAgent(**ac_params['col'])
    agent_lon = DHPAgent(**ac_params['lon'])
    if load_agents:
        agent_col.load(agents_path + "col.pt")
        agent_lon.load(agents_path + "lon.pt")
        with open(agents_path + "rls.pkl", 'rb') as f:
            rls_estimator = pickle.load(f)
    else:
        # incremental RLS estimator
        rls_estimator = RecursiveLeastSquares(**rls_params)
    agents = [agent_col, agent_lon]

    # Create controllers
    lateral_pid = LatPedPID(phi_trim=trim_state[6],
                            lat_trim=trim_actions[2],
                            pedal_trim=trim_actions[3],
                            dt=env_params["dt"],
                            gains_dict=pid_params)
    collective_pid = CollectivePID6DOF(col_trim=trim_actions[0],
                                       h_ref=env_params['initial_altitude'],
                                       dt=env_params['dt'],
                                       proportional_gain=pid_params['Kh'])

    # Excitation signal for the RLS estimator
    excitation = np.load('excitation.npy')

    # Flags
    excitation_phase = False if load_agents else True
    update_col = True if load_agents else False
    update_lon = True
    success = True
    rewards = np.zeros(2)

    def update_agent(n):
        """
        Shorthand to update a single numbered agent after a single transition.
        :param n: Index of agent to update, per list 'agents' (0=col, 1=lon)
        """
        rewards[n], dr_ds = agents[n].get_reward(next_observation, ref)
        F, G = agents[n].get_transition_matrices(rls_estimator)
        agents[n].update_networks(observation, next_observation, ref, next_ref,
                                  dr_ds, F, G)

    # Main loop
    for step in itertools.count():
        lateral_cyclic, pedal = lateral_pid(observation)

        # TODO: It would be much nicer if reference generation would be an internal thing in the environment I guess
        if mode == "train":
            if step == 0:
                ref_generator.set_task(task="train_lon",
                                       t=0,
                                       obs=observation,
                                       velocity_filter_target=0)
                ref = ref_generator.get_ref(observation, env.t)
            elif step == 1000:
                excitation_phase = False
            elif step == env_params['step_switch']:
                agent_lon.learning_rate_actor *= 0.1
                agent_lon.learning_rate_critic *= 0.1
                update_col = True
                ref_generator.set_task("train_col",
                                       t=env.t,
                                       obs=observation,
                                       z_start=observation[11])

            # Get ref, action, take action
            if step < env_params['step_switch']:
                actions = np.array([
                    collective_pid(observation), trim_actions[1] - 0.5 +
                    agent_lon.get_action(observation, ref), lateral_cyclic,
                    pedal
                ])
            else:
                actions = np.array([
                    trim_actions[0] - 0.5 +
                    agent_col.get_action(observation, ref), trim_actions[1] -
                    0.5 + agent_lon.get_action(observation, ref),
                    lateral_cyclic, pedal
                ])
        elif mode == "test_1":
            if step == 0:
                ref_generator.set_task(task="hover", t=0, obs=observation)
                ref = ref_generator.get_ref(observation, env.t)

            elif step == 500:
                ref_generator.set_task("velocity",
                                       t=env.t,
                                       obs=observation,
                                       z_start=0,
                                       velocity_filter_target=25 -
                                       observation[0])

            elif step == 2000:
                ref_generator.set_task("velocity",
                                       t=env.t,
                                       obs=observation,
                                       z_start=0,
                                       velocity_filter_target=0 -
                                       observation[0])

            actions = np.array([
                trim_actions[0] - 0.5 + agent_col.get_action(observation, ref),
                trim_actions[1] - 0.5 + agent_lon.get_action(observation, ref),
                lateral_cyclic, pedal
            ])
        elif mode == "test_2":
            if step == 0:
                ref_generator.set_task(task="descent",
                                       t=0,
                                       t_switch=0,
                                       obs=observation)
                ref = ref_generator.get_ref(observation, env.t)

            elif step == 1000:
                env.set_engine_status(n_engines_available=1, transient=True)
            actions = np.array([
                trim_actions[0] - 0.5 + agent_col.get_action(observation, ref),
                trim_actions[1] - 0.5 + agent_lon.get_action(observation, ref),
                lateral_cyclic, pedal
            ])
        else:
            raise NotImplementedError("Training mode unknown. ")

        if excitation_phase:
            actions += excitation[step]

        actions = np.clip(actions, 0, 1)

        # Take step in the environment
        next_observation, _, done = env.step(actions)
        if env.t < 20:
            ref_generator.A = 10
        elif 20 <= env.t < 40:
            ref_generator.A = 15
        else:
            ref_generator.A = 20
        next_ref = ref_generator.get_ref(observation, env.t)

        # Update RLS estimator,
        rls_estimator.update(observation, actions[:2], next_observation)

        # Collective:
        if update_col:
            update_agent(0)
        else:
            rewards[0] = 0

        # Cyclic
        if update_lon:
            update_agent(1)
        else:
            rewards[1] = 0

        logger.log_states(env.t, observation, ref, actions, rewards,
                          env.P_available, env.P_out)
        if save_weights and (step % weight_save_interval == 0):
            logger.log_weights(env.t, agents, rls_estimator)

        if envelope_limits_reached(observation)[0]:
            print("Save envelope limits reached, stopping simulation. Seed: " +
                  str(seed))
            print("Cause of violation: " +
                  envelope_limits_reached(observation)[1])
            success = False
            done = True

        if np.isnan(actions).any():
            print("NaN encounted in actions at timestep", step, " -- ",
                  actions, "Seed: " + str(seed))
            success = False
            done = True

        if done or (mode == "test_2" and observation[11] > 0):
            break

        # Next step..
        observation = next_observation
        ref = next_ref
        step += 1

    # print("Training time: ", time.time()-t_start)
    logger.finalize()

    if save_logs:
        if not os.path.exists(results_path):
            os.mkdir(results_path)
        logger.save(path=results_path + "log.pkl")

    if save_agents:
        if not os.path.exists(agents_path):
            os.mkdir(agents_path)
        agent_col.save(path=agents_path + "col.pt")
        agent_lon.save(path=agents_path + "lon.pt")
        rls_estimator.save(path=agents_path + "rls.pkl")

    # Visualization
    if plot_states:
        plot_stats(logger)

    if plot_nn_weights and save_weights:
        plot_neural_network_weights(logger,
                                    figsize=(8, 6),
                                    agent_name='col',
                                    title='Collective')
        plot_neural_network_weights(logger,
                                    figsize=(8, 6),
                                    agent_name='lon',
                                    title='Longitudinal Cyclic')
    elif plot_nn_weights and not save_weights:
        print(
            "Called plot_nn_weights but no weights were saved (save_weights=False), skipping. "
        )

    if plot_rls and save_weights:
        plot_rls_weights(logger)
    elif plot_rls and not save_weights:
        print(
            "Called plot_rls_weights but no weights were saved (save_weights=False), skipping. "
        )

    score = np.sqrt(-logger.state_history.iloc[5000:6000]['r2'].sum() / 1000)

    if return_logs:
        return logger, score
    else:
        if success:
            return 1, score
        else:
            return 0, 0