def train_shuffle(): df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=0) model_gene_based.run_model_kfold(df_train, labels, epochs, index=0) df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=1) model_gene_based.run_model_kfold(df_train, labels, epochs, index=1) df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=2) model_gene_based.run_model_kfold(df_train, labels, epochs, index=2) df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=3) model_gene_based.run_model_kfold(df_train, labels, epochs, index=3)
def train(): # deepAMR_run.main() df_train, labels = data_preprocess.process( 38, gene_dataset=True) # SNP or GENE # gene_dataset_creator.main() # df_train, labels = data_preprocess.process(38) # source_analysis.main(df_train) # df_train, labels = data_preprocess.process(38, shuffle_operon_group=True) # ML_methods.model_run(df_train, labels) # BO for ML and GBT # model_gene_based.run_model(df_train, labels, epochs) # wide_n_deep.run_bayesian(df_train, labels) # BO for wide n deep method # model_gene_based.run_bayesian(df_train, labels) # main diff in data folding nested CV # init_point 15, n_iter 15 # run_one_fold also run_k_fold and run_single_fold (section.3.5 help accuracy) # model_gene_based.run_bayesian_single(df_train, labels) # model_gene_based.run_all(df_train, labels, epochs) model_gene_based.run_model_kfold(df_train, labels, epochs)
for i in range(len(y) - 1, -1, -1): if y[i][0] != 0.0 and y[i][0] != 1.0: del y[i] del X[i] y = to_categorical(y) for i in range(0, len(X)): if len(X[i]) < ((len(X[i]) // FrameSize + 1) * FrameSize): for j in range( 0, (((len(X[i]) // FrameSize + 1) * FrameSize) - len(X[i]))): X[i].append(0) X[i] = np.reshape(X[i], (FrameSize, len(X[i]) // FrameSize)) X = np.array(X) y = np.array(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) model_CNN256_LSTM128_64_2(FrameSize, X, X_train, X_test, y_train, y_test, epoch) # model_256_128_64_2BS(FrameSize, X, X_train, X_test, y_train, y_test, epoch) model_256_128_64_2(FrameSize, X, X_train, X_test, y_train, y_test, epoch) if __name__ == '__main__': df_train, labels = data_preprocess.process(6) run_model(df_train, labels, 20)
def main(): """Running the LRCN model on Imperial HPC""" df_train, labels = data_preprocess.process(38, gene_dataset=True) epochs = 200 model_gene_based.run_model_kfold(df_train, labels, epochs)
import pandas as pd from loading_data import data_preprocess def LoadLabel(path): dt = pd.read_csv(path + 'AllLabels' + '.csv') dt.set_index(dt.columns[0], inplace=True, drop=True) return dt df_train, dt = data_preprocess.process(2, 0) dfPyrazinamide = dt[0] arr = dfPyrazinamide.values.tolist() zero = 0 one = 0 nan = 0 for i in range(0, len(arr)): if arr[i][0] == 1: one = one + 1 elif arr[i][0] == 0: zero = zero + 1 else: nan = nan + 1 print("streptomycin") print(one) print(zero) print(nan)
def train(): # deepAMR_run.main() df_train, labels = data_preprocess.process(38, gene_dataset=True) # gene_dataset_creator.main() # df_train, labels = data_preprocess.process(38) # source_analysis.main(df_train) # df_train, labels = data_preprocess.process(38, shuffle_operon_group=True) # ML_methods.model_run(df_train, labels) # model_gene_based.run_model(df_train, labels, epochs) # wide_n_deep.run_bayesian(df_train, labels) # model_gene_based.run_bayesian(df_train, labels, comp_iter=1) # model_gene_based.run_bayesian_kfold(df_train, labels, comp_iter=1) # model_gene_based.run_bayesian_single(df_train, labels) # model_gene_based.run_all(df_train, labels, epochs) # model_gene_based.run_model_kfold(df_train,labels,epochs) # base_approach.run_feature_importance(df_train, labels) # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=200) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv', k=200) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv', k=200) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv', k=200) # # print("______") # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=100) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv', # k=100) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv', # k=100) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv', # k=100) # print("______") # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=50) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv', # k=50) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv', # k=50) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv', # k=50) # print("______") # base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=20) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv', # k=20) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv', # k=20) # base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv', # k=20) # for i in range(1, 8): # print(i) # base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=200) # # for i in range(1, 8): # print(i) # base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=100) # # for i in range(1, 8): # print(i) # base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=50) # # for i in range(1, 8): # print(i) # base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=20) i2 = [] for i11 in range(0, 25): i2.append(i11) print(i2) with multiprocessing.Pool(processes=25) as pool: pool.map(lime.main_function, i2)
def train(): df_train, labels = data_preprocess.process(2) print(type(df_train)) models.model_one_vs_all.run_model(df_train, labels, epochs, limited=True) ML_methods.model_run(df_train, labels)