def train_shuffle():
    df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=0)
    model_gene_based.run_model_kfold(df_train, labels, epochs, index=0)
    df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=1)
    model_gene_based.run_model_kfold(df_train, labels, epochs, index=1)
    df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=2)
    model_gene_based.run_model_kfold(df_train, labels, epochs, index=2)
    df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=3)
    model_gene_based.run_model_kfold(df_train, labels, epochs, index=3)
示例#2
0
def train():
    # deepAMR_run.main()
    df_train, labels = data_preprocess.process(
        38, gene_dataset=True)  # SNP or GENE
    # gene_dataset_creator.main()
    # df_train, labels = data_preprocess.process(38)
    # source_analysis.main(df_train)
    # df_train, labels = data_preprocess.process(38, shuffle_operon_group=True)
    # ML_methods.model_run(df_train, labels) # BO for ML and GBT
    # model_gene_based.run_model(df_train, labels, epochs)
    # wide_n_deep.run_bayesian(df_train, labels) # BO for wide n deep method
    # model_gene_based.run_bayesian(df_train, labels) # main diff in data folding nested CV # init_point 15, n_iter 15 # run_one_fold also run_k_fold and run_single_fold (section.3.5 help accuracy)
    # model_gene_based.run_bayesian_single(df_train, labels)
    # model_gene_based.run_all(df_train, labels, epochs)
    model_gene_based.run_model_kfold(df_train, labels, epochs)
示例#3
0
    for i in range(len(y) - 1, -1, -1):
        if y[i][0] != 0.0 and y[i][0] != 1.0:
            del y[i]
            del X[i]

    y = to_categorical(y)

    for i in range(0, len(X)):
        if len(X[i]) < ((len(X[i]) // FrameSize + 1) * FrameSize):
            for j in range(
                    0,
                (((len(X[i]) // FrameSize + 1) * FrameSize) - len(X[i]))):
                X[i].append(0)
        X[i] = np.reshape(X[i], (FrameSize, len(X[i]) // FrameSize))

    X = np.array(X)
    y = np.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

    model_CNN256_LSTM128_64_2(FrameSize, X, X_train, X_test, y_train, y_test,
                              epoch)
    # model_256_128_64_2BS(FrameSize, X, X_train, X_test, y_train, y_test, epoch)
    model_256_128_64_2(FrameSize, X, X_train, X_test, y_train, y_test, epoch)


if __name__ == '__main__':
    df_train, labels = data_preprocess.process(6)
    run_model(df_train, labels, 20)
示例#4
0
def main():
    """Running the LRCN model on Imperial HPC"""
    df_train, labels = data_preprocess.process(38, gene_dataset=True)
    epochs = 200
    model_gene_based.run_model_kfold(df_train, labels, epochs)
示例#5
0
import pandas as pd

from loading_data import data_preprocess


def LoadLabel(path):
    dt = pd.read_csv(path + 'AllLabels' + '.csv')
    dt.set_index(dt.columns[0], inplace=True, drop=True)
    return dt


df_train, dt = data_preprocess.process(2, 0)

dfPyrazinamide = dt[0]

arr = dfPyrazinamide.values.tolist()
zero = 0
one = 0
nan = 0
for i in range(0, len(arr)):
    if arr[i][0] == 1:
        one = one + 1
    elif arr[i][0] == 0:
        zero = zero + 1
    else:
        nan = nan + 1
print("streptomycin")
print(one)
print(zero)
print(nan)
def train():
    # deepAMR_run.main()
    df_train, labels = data_preprocess.process(38, gene_dataset=True)
    # gene_dataset_creator.main()
    # df_train, labels = data_preprocess.process(38)
    # source_analysis.main(df_train)
    # df_train, labels = data_preprocess.process(38, shuffle_operon_group=True)
    # ML_methods.model_run(df_train, labels)
    # model_gene_based.run_model(df_train, labels, epochs)
    # wide_n_deep.run_bayesian(df_train, labels)
    # model_gene_based.run_bayesian(df_train, labels, comp_iter=1)
    # model_gene_based.run_bayesian_kfold(df_train, labels, comp_iter=1)
    # model_gene_based.run_bayesian_single(df_train, labels)
    # model_gene_based.run_all(df_train, labels, epochs)
    # model_gene_based.run_model_kfold(df_train,labels,epochs)


    # base_approach.run_feature_importance(df_train, labels)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=200)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv', k=200)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv', k=200)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv', k=200)
    #
    # print("______")
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=100)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv',
    #                                       k=100)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv',
    #                                       k=100)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv',
    #                                       k=100)
    # print("______")
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=50)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv',
    #                                       k=50)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv',
    #                                       k=50)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv',
    #                                       k=50)
    # print("______")
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=20)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv',
    #                                       k=20)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv',
    #                                       k=20)
    # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv',
    #                                       k=20)

    # for i in range(1, 8):
    #     print(i)
    #     base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=200)
    #
    # for i in range(1, 8):
    #     print(i)
    #     base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=100)
    #
    # for i in range(1, 8):
    #     print(i)
    #     base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=50)
    #
    # for i in range(1, 8):
    #     print(i)
    #     base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=20)

    i2 = []

    for i11 in range(0, 25):
        i2.append(i11)

    print(i2)

    with multiprocessing.Pool(processes=25) as pool:
        pool.map(lime.main_function, i2)
示例#7
0
def train():
    df_train, labels = data_preprocess.process(2)
    print(type(df_train))
    models.model_one_vs_all.run_model(df_train, labels, epochs, limited=True)
    ML_methods.model_run(df_train, labels)