def run_model(data_set, kmer_size, norm_input, encoding_dim_1, encoding_dim_2,
              encoded_activation, input_dropout_pct, dropout_pct, num_epochs,
              batch_size, n_splits, n_repeats, compute_informative_features,
              plot_iteration, graph_dir, outFile):

    # format strings for outputting the paramters associated with this run:
    summary_string, plotting_string = stats_utils.format_input_parameters_printing_2layers(
        data_set, kmer_size, norm_input, encoding_dim_1, encoding_dim_2,
        encoded_activation, input_dropout_pct, dropout_pct, num_epochs,
        batch_size, n_splits, n_repeats, compute_informative_features,
        plot_iteration)

    outFile_header = 'data_set\tkmer_size\tnorm_input\tencoding_dim_1\tencoding_dim_2\tencoded_activation\tinput_dropout_pct\tdropout_pct\tnum_epochs\tbatch_size\tn_splits\tn_repeats\t'

    #################
    # Load the data #
    #################
    print('Loading data...')

    data_normalized, labels, rskf = load_kmer_cnts_jf.load_single_disease(
        data_set, kmer_size, n_splits, n_repeats, precomputed_kfolds=False)

    # rskf = repeated stratified k fold. This contains all the kfold-by-iteration combos.

    ###################################################
    # iterate through the data kfolds and iterations #
    ###################################################

    # Create a dictionary to store the metrics of each fold
    aggregated_statistics = {}  # key=n_repeat, values= dictionary with stats

    for n_repeat in range(0, len(rskf[0])):

        print('Iteration %s...' % n_repeat)

        aggregated_statistics[n_repeat] = {}

        train_idx = rskf[0][n_repeat]
        test_idx = rskf[1][n_repeat]
        x_train, y_train = data_normalized[train_idx], labels[train_idx]
        x_test, y_test = data_normalized[test_idx], labels[test_idx]

        #standardize the data, mean=0, std=1
        if norm_input:
            x_train, x_test = stats_utils.standardize_data(x_train, x_test)

        ###########################################
        # set up a model (supervised learning)    #
        ###########################################
        # note that the model has to be instantiated each time a new fold is started otherwise the weights will not start from scratch.

        input_dim = len(
            data_normalized[0])  # this is the number of input kmers

        model = deep_learning_models.create_supervised_model_2layers(
            input_dim, encoding_dim_1, encoding_dim_2, encoded_activation,
            input_dropout_pct, dropout_pct)

        #weightFile = os.environ['HOME'] + '/deep_learning_microbiome/data/weights.txt'

        ##################################################
        # Fit the model with the train data of this fold #
        ##################################################
        history = History()
        # history is a dictionary. To get the keys, type print(history.history.keys())

        model.fit(x_train,
                  y_train,
                  epochs=num_epochs,
                  batch_size=len(x_train),
                  shuffle=True,
                  validation_data=(x_test, y_test),
                  verbose=0,
                  callbacks=[history])

        # predict using the held out data
        y_pred = model.predict(x_test)

        # save the weights of this model. TODO

        ################################################################
        # Compute summary statistics                                   #
        ################################################################
        # Store the results of this fold in aggregated_statistics
        aggregated_statistics = stats_utils.compute_summary_statistics(
            y_test, y_pred, history, aggregated_statistics, n_repeat)

        # could  plot everything (roc, accuracy vs epoch, loss vs epoch, confusion matrix, precision recall) for each fold, but this will produce a lot of graphs.
        if compute_informative_features:
            shap_values, shap_values_summed = stats_utils.compute_shap_values_deeplearning(
                input_dim, model, x_test)
            aggregated_statistics[n_repeat][
                'shap_values_summed'] = shap_values_summed
            aggregated_statistics[n_repeat]['shap_values'] = shap_values

        # also plot:
        #shap.summary_plot(shap_values, X, plot_type="bar")
        #shap.summary_plot(shap_values, X)

    ##############################################
    # aggregate the results from all the k-folds #
    # Print and Plot                             #
    ##############################################
    print('Aggregating statistics across iterations and printing/plotting...')

    stats_utils.aggregate_statistics_across_folds(aggregated_statistics, rskf,
                                                  n_splits, outFile,
                                                  summary_string,
                                                  plotting_string,
                                                  outFile_header)

    ###################
    # Aggregate shap: #
    ###################

    if compute_informative_features:
        print('Computing informative features with Shap...')
        stats_utils.aggregate_shap(aggregated_statistics, rskf)
tmp_intermediate_directory = config_file.tmp_intermediate_directory

for kmer_size in [5, 6, 7, 8, 10]:

    print(kmer_size)
    #################
    # Load the data #
    #################
    print('Loading data...')

    data_set = 'Qin_et_al'

    data_normalized, kmer_cnts, labels, rskf = load_kmer_cnts_jf.load_single_disease(
        data_set,
        kmer_size,
        n_splits,
        n_repeats,
        precomputed_kfolds=False,
        bootstrap=True)

    num_replicates = 100
    num_kmers = 100000
    bootstrapped_data = stats_utils.bootstrap_data(data_normalized, kmer_cnts,
                                                   num_replicates, num_kmers)

    pickle.dump(
        bootstrapped_data,
        open(
            "%skmer_size_%s_Qin_bootstrap.p" %
            (tmp_intermediate_directory, kmer_size), "wb"))