######## Appending Metrics ########
            train_label_list = conversions.tensor_list_to_int_list(
                train_labels)
            train_pred_list = conversions.tensor_list_to_int_list(train_preds)
            val_label_list = conversions.tensor_list_to_int_list(val_labels)
            val_pred_list = conversions.tensor_list_to_int_list(val_preds)

            train_scores = metrics.precision_recall_fscore_support(
                train_label_list, train_pred_list, average='weighted')
            val_scores = metrics.precision_recall_fscore_support(
                val_label_list, val_pred_list, average='weighted')

            validation_user_statistics_over_epochs.append(
                statistics.generate_training_statistics_for_user(
                    val_labels, val_preds, val_users))

            if val_scores[2] > best_split_score:
                best_split_score = val_scores[2]
                epoch_at_best_score = epoch

        split_val_scores.append(best_split_score)
        best_score_epoch_log.append(epoch_at_best_score)

    print("Avg Cross Val Score: {}".format(list_mean(split_val_scores)))
    print("Students Used for Training: {}".format(student_filter_ids))

# scores_and_epochs = (split_val_scores, epoch_at_best_score)
# scores_and_epochs_file_name = os.path.join(definitions.DATA_DIR, "cross_val_scores/multitask_autoencoder.pkl")
# write_utils.data_structure_to_pickle(scores_and_epochs, scores_and_epochs_file_name)
Exemplo n.º 2
0
def search_multitask_auto_encoder(hyper_parameters_list, data: dict):
    splits = cross_val.get_k_fod_cross_val_splits_stratified_by_students(data)
    student_list = conversions.extract_distinct_student_idsfrom_keys(
        data['data'].keys())
    tensorified_data = tensorify.tensorify_data_gru_d(
        copy.deepcopy(data), torch.cuda.is_available())

    final_scores_for_each_config = []

    print("Label Distribution")
    print(statistics.get_train_test_val_label_counts_from_raw_data(data))

    for model_params_no, model_params in enumerate(hyper_parameters_list):
        print(
            "###################### Param Config No: {} ########################"
            .format(model_params_no))
        print("Params: ", model_params)

        (use_histogram, autoencoder_bottle_neck_feature_size,
         autoencoder_num_layers, alpha, beta, decay, num_features,
         num_covariates, shared_hidden_layer_size,
         user_dense_layer_hidden_size, num_classes, learning_rate, n_epochs,
         shared_layer_dropout_prob, user_head_dropout_prob, class_weights,
         device) = helper.get_params_from_model(model_params, data)

        best_val_scores = []

        for split_no, split in enumerate(splits):

            print("Split {}".format(split_no))

            best_split_score = -1

            tensorified_data['train_ids'] = split["train_ids"]
            tensorified_data['val_ids'] = split["val_ids"]
            tensorified_data['test_ids'] = []

            model, reconstruction_criterion, classification_criterion, optimizer = helper.init_multitask_autoencoder_learner(
                num_features, autoencoder_bottle_neck_feature_size,
                autoencoder_num_layers, shared_hidden_layer_size,
                user_dense_layer_hidden_size, num_classes, num_covariates,
                shared_layer_dropout_prob, user_head_dropout_prob,
                learning_rate, decay, class_weights, student_list)

            total_loss_over_epochs, scores_over_epochs = plotting.get_empty_stat_over_n_epoch_dictionaries(
            )
            reconstruction_loss_over_epochs = copy.deepcopy(
                total_loss_over_epochs)
            classification_loss_over_epochs = copy.deepcopy(
                total_loss_over_epochs)

            for epoch in tqdm.tqdm(range(n_epochs)):

                (train_total_loss, train_total_reconstruction_loss,
                 train_total_classification_loss, train_labels, train_preds,
                 train_users), (val_total_loss, val_total_reconstruction_loss,
                                val_total_classification_loss, val_labels,
                                val_preds,
                                val_users) = helper.train_for_one_epoch(
                                    tensorified_data, num_classes, model,
                                    reconstruction_criterion,
                                    classification_criterion, device,
                                    optimizer, alpha, beta, use_histogram)

                ######## Appending losses ########
                total_loss_over_epochs['train_loss'].append(train_total_loss)
                total_loss_over_epochs['val_loss'].append(val_total_loss)

                reconstruction_loss_over_epochs['train_loss'].append(
                    train_total_reconstruction_loss)
                reconstruction_loss_over_epochs['val_loss'].append(
                    val_total_reconstruction_loss)

                classification_loss_over_epochs['train_loss'].append(
                    train_total_classification_loss)
                classification_loss_over_epochs['val_loss'].append(
                    val_total_classification_loss)

                ######## Appending Metrics ########
                train_label_list = conversions.tensor_list_to_int_list(
                    train_labels)
                train_pred_list = conversions.tensor_list_to_int_list(
                    train_preds)
                val_label_list = conversions.tensor_list_to_int_list(
                    val_labels)
                val_pred_list = conversions.tensor_list_to_int_list(val_preds)

                train_scores = metrics.precision_recall_fscore_support(
                    train_label_list, train_pred_list,
                    average='weighted')[F_SCORE_INDEX]
                val_scores = metrics.precision_recall_fscore_support(
                    val_label_list, val_pred_list,
                    average='weighted')[F_SCORE_INDEX]

                scores_over_epochs['train_scores'].append(train_scores)
                scores_over_epochs['val_scores'].append(val_scores)

                if val_scores > best_split_score:
                    best_split_score = val_scores

            best_val_scores.append(best_split_score)

        avg_val_score = list_mean(best_val_scores)
        final_scores_for_each_config.append((avg_val_score, model_params))

        print("Average score for current configuration: {}".format(
            avg_val_score))

    grid_search_details_file_path = os.path.join(definitions.DATA_DIR,
                                                 "grid_search_details.pkl")
    write_utils.data_structure_to_pickle(final_scores_for_each_config,
                                         grid_search_details_file_path)