Exemplo n.º 1
0
def generate_BBB_controller(domain_name, task_identity, window_size,
                            number_demonstrations, epochs, number_mini_batches,
                            activation_unit, learning_rate, hidden_units,
                            number_samples_variance_reduction, precision_alpha,
                            weights_prior_mean_1, weights_prior_mean_2,
                            weights_prior_deviation_1,
                            weights_prior_deviation_2, mixture_pie, rho_mean,
                            extra_likelihood_emphasis):

    configuration_identity = LOGS_DIRECTORY + domain_name + '/' + str(
        number_demonstrations) + '/bbb_controller/window_size_' + str(
            window_size) + '/' + str(task_identity) + '/'
    training_logs_configuration_identity = configuration_identity + 'training/'
    if not os.path.exists(training_logs_configuration_identity):
        os.makedirs(training_logs_configuration_identity)

    directory_to_save_tensorboard_data = training_logs_configuration_identity + TENSORBOARD_DIRECTORY
    saved_models_during_iterations_bbb = training_logs_configuration_identity + SAVED_MODELS_DURING_ITERATIONS_DIRECTORY
    saved_final_model_bbb = training_logs_configuration_identity + SAVED_FINAL_MODEL_DIRECTORY
    #if not os.path.exists(INPUT_MANIPULATION_DIRECTORY):
    #    os.makedirs(INPUT_MANIPULATION_DIRECTORY)
    if not os.path.exists(directory_to_save_tensorboard_data):
        os.makedirs(directory_to_save_tensorboard_data)
    if not os.path.exists(saved_models_during_iterations_bbb):
        os.makedirs(saved_models_during_iterations_bbb)
    if not os.path.exists(saved_final_model_bbb):
        os.makedirs(saved_final_model_bbb)

    start_time = datetime.now()
    moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask(
        domain_name=domain_name,
        task_identity=task_identity,
        window_size=window_size,
        number_demonstrations=number_demonstrations)

    # House-keeping to make data amenable for good training
    mean_x, deviation_x = get_mean_and_deviation(data=moving_windows_x)
    moving_windows_x = NORMALIZE(moving_windows_x, mean_x, deviation_x)

    mean_y, deviation_y = get_mean_and_deviation(data=moving_windows_y)
    moving_windows_y = NORMALIZE(moving_windows_y, mean_y, deviation_y)

    file_name_to_save_input_manipulation_data = training_logs_configuration_identity + 'training_meta_data.pkl'
    normalization_data_to_store = {
        MEAN_KEY_X: mean_x,
        DEVIATION_KEY_X: deviation_x,
        MEAN_KEY_Y: mean_y,
        DEVIATION_KEY_Y: deviation_y,
        DRIFT_PER_TIME_STEP_KEY: drift_per_time_step,
        MOVING_WINDOWS_X_SIZE_KEY: moving_windows_x_size,
        WINDOW_SIZE_KEY: window_size
    }
    with open(file_name_to_save_input_manipulation_data, 'wb') as f:
        pickle.dump(normalization_data_to_store, f)

    print(GREEN('Creating the BBB based Bayesian NN'))
    BBB_Regressor = BBBNNRegression(
        number_mini_batches=number_mini_batches,
        number_features=moving_windows_x.shape[1],
        number_output_units=moving_windows_y.shape[1],
        activation_unit=activation_unit,
        learning_rate=learning_rate,
        hidden_units=hidden_units,
        number_samples_variance_reduction=number_samples_variance_reduction,
        precision_alpha=precision_alpha,
        weights_prior_mean_1=weights_prior_mean_1,
        weights_prior_mean_2=weights_prior_mean_2,
        weights_prior_deviation_1=weights_prior_deviation_1,
        weights_prior_deviation_2=weights_prior_deviation_2,
        mixture_pie=mixture_pie,
        rho_mean=rho_mean,
        extra_likelihood_emphasis=extra_likelihood_emphasis)
    print(GREEN('BBB based Bayesian NN created successfully'))

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(directory_to_save_tensorboard_data,
                                       sess.graph)
        saver = tf.train.Saver(max_to_keep=3, keep_checkpoint_every_n_hours=2)
        previous_minimum_loss = sys.float_info.max
        mini_batch_size = int(moving_windows_x.shape[0] / number_mini_batches)
        for epoch_iterator in tqdm(range(epochs)):
            moving_windows_x, moving_windows_y = randomize(
                moving_windows_x, moving_windows_y)
            ptr = 0
            for mini_batch_iterator in range(number_mini_batches):
                x_batch = moving_windows_x[ptr:ptr + mini_batch_size, :]
                y_batch = moving_windows_y[ptr:ptr + mini_batch_size, :]
                _, loss, summary = sess.run([
                    BBB_Regressor.train(),
                    BBB_Regressor.getMeanSquaredError(),
                    BBB_Regressor.summarize()
                ],
                                            feed_dict={
                                                BBB_Regressor.X_input: x_batch,
                                                BBB_Regressor.Y_input: y_batch
                                            })
                sess.run(BBB_Regressor.update_mini_batch_index())
                if loss < previous_minimum_loss:
                    saver.save(sess,
                               saved_models_during_iterations_bbb +
                               'iteration',
                               global_step=epoch_iterator,
                               write_meta_graph=False)
                    previous_minimum_loss = loss
                ptr += mini_batch_size
                writer.add_summary(summary,
                                   global_step=tf.train.global_step(
                                       sess, BBB_Regressor.global_step))
            #if epoch_iterator % 2500 == 0:
            #   print(RED('Training progress: ' + str(epoch_iterator) + '/' + str(epochs)))
        writer.close()
        saver.save(sess, saved_final_model_bbb + 'final', write_state=False)
Exemplo n.º 2
0
def data_efficient_imitation_across_multiple_tasks(domain_name, window_size, number_demonstrations, adapt_detector_threshold, start_monitoring_at_time_step, detector_c, detector_m, initial_detector_threshold, epochs, number_mini_batches, activation_unit, learning_rate, hidden_units, number_samples_variance_reduction, precision_alpha, weights_prior_mean_1, weights_prior_mean_2, weights_prior_deviation_1, weights_prior_deviation_2, mixture_pie, rho_mean, extra_likelihood_emphasis):
    COPY_OF_ALL_MUJOCO_TASK_IDENTITIES = copy.deepcopy(ALL_MUJOCO_TASK_IDENTITIES)
    for simulation_iterator in range(TOTAL_SIMULATION_ITERATIONS):
        random.seed(simulation_iterator)
        random.shuffle(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES)
        
        ###### Naive Controller ######
        '''
        all_gathered_x, all_gathered_y = None, None
        tasks_trained_on, tasks_encountered = [], []
        
        print(GREEN('Starting runs for the naive controller'))
        for task_iterator, current_task_identity in enumerate(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES):
            print(RED('Simulation iteration is ' + str(simulation_iterator) + ' and task iterator is ' + str(task_iterator)))

            tasks_trained_on.append(current_task_identity)
            tasks_encountered.append(current_task_identity)
            moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask(domain_name=domain_name, task_identity=current_task_identity, window_size=window_size, number_demonstrations=number_demonstrations)
            
            if all_gathered_x is None:
                all_gathered_x, all_gathered_y = copy.deepcopy(moving_windows_x), copy.deepcopy(moving_windows_y)
            else:
                all_gathered_x, all_gathered_y = np.append(all_gathered_x, moving_windows_x, axis=0), np.append(all_gathered_y, moving_windows_y, axis=0)

            disposible_training_x, disposible_training_y = copy.deepcopy(all_gathered_x), copy.deepcopy(all_gathered_y)
            mean_x, deviation_x = get_mean_and_deviation(data = disposible_training_x)
            disposible_training_x = NORMALIZE(disposible_training_x, mean_x, deviation_x)
            mean_y, deviation_y = get_mean_and_deviation(data = disposible_training_y)
            disposible_training_y = NORMALIZE(disposible_training_y, mean_y, deviation_y)

            configuration_identity = 'logs/' + domain_name + '/naive_controller/' + str(simulation_iterator) + '/' + str(task_iterator) + '/'
            training_logs_directory = configuration_identity + 'training/'
            if not os.path.exists(training_logs_directory):
                os.makedirs(training_logs_directory)

            file_name_to_save_meta_data = training_logs_directory + 'training_meta_data.pkl'
            meta_data_to_store = {MEAN_KEY_X: mean_x, DEVIATION_KEY_X: deviation_x, MEAN_KEY_Y:mean_y, DEVIATION_KEY_Y:deviation_y,
                                  DRIFT_PER_TIME_STEP_KEY: drift_per_time_step, MOVING_WINDOWS_X_SIZE_KEY: moving_windows_x_size,
                                  TASKS_TRAINED_ON_KEY: tasks_trained_on, TASKS_ENCOUNTERED_KEY: tasks_encountered,
                                  WINDOW_SIZE_KEY: window_size}
            with open(file_name_to_save_meta_data, 'wb') as f:
                pickle.dump(meta_data_to_store, f)

            print(BLUE('Training phase'))
            train_BBB(data_x=copy.deepcopy(disposible_training_x), data_y=copy.deepcopy(disposible_training_y), configuration_identity=configuration_identity, epochs=epochs, number_mini_batches=number_mini_batches, activation_unit=activation_unit,
             learning_rate=learning_rate, hidden_units=hidden_units, number_samples_variance_reduction=number_samples_variance_reduction, precision_alpha=precision_alpha, weights_prior_mean_1=weights_prior_mean_1,
              weights_prior_mean_2=weights_prior_mean_2, weights_prior_deviation_1=weights_prior_deviation_1, weights_prior_deviation_2=weights_prior_deviation_2, mixture_pie=mixture_pie, rho_mean=rho_mean, extra_likelihood_emphasis=extra_likelihood_emphasis)

            print(BLUE('Validation phase'))
            validate_BBB(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity)
        
        '''
        ###### BBB Controller ######
        did_succeed = False
        all_gathered_x, all_gathered_y = None, None
        tasks_trained_on, tasks_encountered, task_iterator_trained_on = [], [], []
        current_task_identity = COPY_OF_ALL_MUJOCO_TASK_IDENTITIES[0]
        detector = Detector(domain_name=domain_name, start_monitoring_at_time_step=start_monitoring_at_time_step, initial_threshold=initial_detector_threshold, detector_m=detector_m, detector_c=detector_c)

        print(GREEN('Starting runs for the BBB controller'))
        for task_iterator in range(len(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES)):
            print(RED('Simulation iteration is ' + str(simulation_iterator) + ', task iterator is ' + str(task_iterator) + ', and current task is ' + str(current_task_identity)))
            tasks_encountered.append(current_task_identity)
            detector.reset()

            configuration_identity = 'logs/' + domain_name + '/bbb_controller/detector_c_' + str(detector_c) + '_detector_m_' + str(detector_m) + '/' + str(simulation_iterator) + '/' + str(task_iterator) + '/'
            training_logs_directory = configuration_identity + 'training/'
            if not os.path.exists(training_logs_directory):
                os.makedirs(training_logs_directory)

            if not did_succeed:
                tasks_trained_on.append(current_task_identity)
                task_iterator_trained_on.append(task_iterator)

                moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask(domain_name=domain_name, task_identity=current_task_identity, window_size=window_size, number_demonstrations=number_demonstrations)              
                if all_gathered_x is None:
                    all_gathered_x, all_gathered_y = copy.deepcopy(moving_windows_x), copy.deepcopy(moving_windows_y)
                else:
                    all_gathered_x, all_gathered_y = np.append(all_gathered_x, moving_windows_x, axis=0), np.append(all_gathered_y, moving_windows_y, axis=0)
                disposible_training_x, disposible_training_y = copy.deepcopy(all_gathered_x), copy.deepcopy(all_gathered_y)
                mean_x, deviation_x = get_mean_and_deviation(data = disposible_training_x)
                disposible_training_x = NORMALIZE(disposible_training_x, mean_x, deviation_x)
                mean_y, deviation_y = get_mean_and_deviation(data = disposible_training_y)
                disposible_training_y = NORMALIZE(disposible_training_y, mean_y, deviation_y)

                file_name_to_save_meta_data = training_logs_directory + 'training_meta_data.pkl'
                meta_data_to_store = {MEAN_KEY_X: mean_x, DEVIATION_KEY_X: deviation_x, MEAN_KEY_Y:mean_y, DEVIATION_KEY_Y:deviation_y,
                                      DRIFT_PER_TIME_STEP_KEY: drift_per_time_step, MOVING_WINDOWS_X_SIZE_KEY: moving_windows_x_size,
                                      TASKS_TRAINED_ON_KEY: tasks_trained_on, TASKS_ENCOUNTERED_KEY: tasks_encountered,
                                      WINDOW_SIZE_KEY: window_size}
                with open(file_name_to_save_meta_data, 'wb') as f:
                    pickle.dump(meta_data_to_store, f)

                print(BLUE('Training phase'))
                train_BBB(data_x=copy.deepcopy(disposible_training_x), data_y=copy.deepcopy(disposible_training_y), configuration_identity=configuration_identity, epochs=epochs, number_mini_batches=number_mini_batches,
                 activation_unit=activation_unit, learning_rate=learning_rate, hidden_units=hidden_units, number_samples_variance_reduction=number_samples_variance_reduction, precision_alpha=precision_alpha,
                  weights_prior_mean_1=weights_prior_mean_1, weights_prior_mean_2=weights_prior_mean_2, weights_prior_deviation_1=weights_prior_deviation_1, weights_prior_deviation_2=weights_prior_deviation_2,
                   mixture_pie=mixture_pie, rho_mean=rho_mean, extra_likelihood_emphasis=extra_likelihood_emphasis)
                
                _, average_uncertainty = run_on_itself(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity)
                #### Ground the threshold according to the quantitative value of uncertainty on the current task ####
                if adapt_detector_threshold:
                    detector.threshold = average_uncertainty
                
                meta_data_file_for_this_run = 'logs/' + domain_name + '/bbb_controller/detector_c_' + str(detector_c) + '_detector_m_' + str(detector_m) + '/' + str(simulation_iterator) + '/meta_data.pkl'
                meta_data_for_this_run = {TRAINING_TASK_ITERATION_KEY: task_iterator_trained_on}
                with open(meta_data_file_for_this_run, 'wb') as f:
                    pickle.dump(meta_data_for_this_run, f)
                #need_training = False

            print(BLUE('Validation phase'))
            validate_BBB(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity)

            if task_iterator == (len(COPY_OF_ALL_MUJOCO_TASK_IDENTITIES) - 1):
                break

            current_task_identity = COPY_OF_ALL_MUJOCO_TASK_IDENTITIES[task_iterator + 1]
            tasks_encountered.append(current_task_identity)
            did_succeed, average_uncertainty = run_on_itself(domain_name=domain_name, task_identity=current_task_identity, configuration_identity=configuration_identity, detector=detector)
            did_succeed = str_to_bool(did_succeed)
def generate_GP_controller(domain_name, task_identity, window_size,
                           number_demonstrations):
    #if not os.path.exists(LOGS_DIRECTORY):
    #    os.makedirs(LOGS_DIRECTORY)

    GP_LOGS_DIRECTORY = LOGS_DIRECTORY + domain_name + '/' + str(
        number_demonstrations) + '/GP_controller/window_size_' + str(
            window_size) + '/'
    if not os.path.exists(GP_LOGS_DIRECTORY):
        os.makedirs(GP_LOGS_DIRECTORY)
    file_to_save_gp_fit_logs = GP_LOGS_DIRECTORY + str(
        task_identity) + '_fit.pkl'
    gp_fit_logs = {}

    start_time = datetime.now()
    moving_windows_x, moving_windows_y, drift_per_time_step, moving_windows_x_size = getDemonstrationsFromTask(
        domain_name=domain_name,
        task_identity=task_identity,
        window_size=window_size,
        number_demonstrations=number_demonstrations)
    print(
        RED('Time taken to generate dataset is ' +
            str(datetime.now() - start_time)))
    '''
    print(GREEN('Heuristic values of the parameters'))
    kernel_variance = np.var(moving_windows_y)
    kernel_lengthscales = np.median(cdist(moving_windows_x, moving_windows_x, 'sqeuclidean').flatten())
    print(BLUE('Kernel Variance is ' + str(kernel_variance)))
    print(BLUE('Kernel lengthscales is ' + str(kernel_lengthscales)))
    print(BLUE('Likelihood variance is 1/%-10/% /of ' + str(kernel_variance)))
    '''

    mean_x, deviation_x = get_mean_and_deviation(data=moving_windows_x)
    moving_windows_x = NORMALIZE(moving_windows_x, mean_x, deviation_x)

    mean_y, deviation_y = get_mean_and_deviation(data=moving_windows_y)
    moving_windows_y = NORMALIZE(moving_windows_y, mean_y, deviation_y)

    k = gpflow.kernels.RBF(moving_windows_x.shape[1],
                           lengthscales=0.01 *
                           np.std(moving_windows_x, axis=0))

    moving_windows_x = np.float64(moving_windows_x)
    moving_windows_y = np.float64(moving_windows_y)

    m = gpflow.models.GPR(moving_windows_x, moving_windows_y, k)
    m.likelihood.variance = 1e-4

    mean_control, var_control = m.predict_y(moving_windows_x)
    mean_squared_predictive_error = np.mean(
        np.square(mean_control - moving_windows_y))
    average_dev_control = np.mean(np.sqrt(var_control))
    gp_fit_logs[UNOPTIMIZED_GP_FIT_KEY] = {
        MEAN_GP_FIT_PREDICTIVE_ERROR_KEY: mean_squared_predictive_error,
        MEAN_GP_FIT_DEVIATION_KEY: average_dev_control
    }
    gp_fit_logs[UNOPTIMIZED_GP_TRAINABLES_KEY] = m.as_pandas_table()

    start_time = datetime.now()
    gpflow.train.ScipyOptimizer().minimize(m)
    print(
        RED('Time taken to optimize the parameters is ' +
            str(datetime.now() - start_time)))

    mean_control, var_control = m.predict_y(moving_windows_x)
    mean_squared_predictive_error = np.mean(
        np.square(mean_control - moving_windows_y))
    average_dev_control = np.mean(np.sqrt(var_control))
    gp_fit_logs[OPTIMIZED_GP_FIT_KEY] = {
        MEAN_GP_FIT_PREDICTIVE_ERROR_KEY: mean_squared_predictive_error,
        MEAN_GP_FIT_DEVIATION_KEY: average_dev_control
    }
    gp_fit_logs[OPTIMIZED_GP_TRAINABLES_KEY] = m.as_pandas_table()

    with open(file_to_save_gp_fit_logs, 'wb') as f:
        pickle.dump(gp_fit_logs, f, protocol=-1)

    #plot(m)
    #print(m.read_trainables())
    #print(m.as_pandas_table())

    #print(m.kern.lengthscales.read_value())

    start_time = datetime.now()
    print(GREEN('Started Validation'))
    logs_for_all_tasks = validate_GP_controller(
        domain_name=domain_name,
        task_identity=task_identity,
        window_size=window_size,
        drift_per_time_step=drift_per_time_step,
        moving_windows_x_size=moving_windows_x_size,
        behavior_controller=m,
        mean_x=mean_x,
        deviation_x=deviation_x,
        mean_y=mean_y,
        deviation_y=deviation_y)
    print(
        RED('Time taken for the validation step is ' +
            str(datetime.now() - start_time)))

    file_to_save_logs = GP_LOGS_DIRECTORY + str(
        task_identity) + '_validation.pkl'
    with open(file_to_save_logs, 'wb') as f:
        pickle.dump(logs_for_all_tasks, f, protocol=-1)