def predict_dense_nets_kfold(test_path, train_x=None): pre_type = "db6" #"sym" # Net Structure input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=10, attention=False) model = Model(inputs=inputs_list, outputs=outputs) test_files = os.listdir(test_path) test_files.sort() print("*********read data for dense nets******") test_x = [ read_data_seg(test_path, preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num) ] n_fold = 1 n_classes = 10 dense_blend_test = np.zeros( (len(test_files), n_fold, n_classes)).astype('float32') model_path = './official_densenet_model/' en_amount = 1 for seed in range(en_amount): print("*********Start Dense Nets***************") for i in range(n_fold): print('fold: ', i + 1, ' training') model_name = "densenet_extend_weights-best_k{}_r{}_0806_30.hdf5".format( seed, i + 2) print(model_name) # Evaluate best trained model model.load_weights(model_path + model_name) dense_blend_test[:, i, :] = model.predict(test_x) ''' blend_train = model.predict(train_x) gc.collect() csv_path = "./quarter_final/" pd.DataFrame(blend_train).to_csv(csv_path+"densenet_4block_10net_fold2.csv",index=None) ''' del test_x gc.collect() print(" predict_dense_nets_kfold OK !!!!!!!!!") return dense_blend_test
def predict_dense_nets(test_path): pre_type = "sym" # Net Structure input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=9) model = Model(inputs=inputs_list, outputs=outputs) test_files = os.listdir(test_path) test_files.sort() print("*********read data for dense nets******") test_x = [read_data_seg(test_path, preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num)] n_fold = 3 n_classes = 9 dense_blend_test = np.zeros((len(test_files), n_fold, n_classes)).astype('float32') model_path = './official_densenet_model/' en_amount = 1 for seed in range(en_amount): print("*********Start Dense Nets***************") for i in range(n_fold): print('fold: ', i + 1, ' training') # Evaluate best trained model model.load_weights(model_path + 'densenet_extend_weights-best_k{}_r{}.hdf5'.format(seed, i)) dense_blend_test[:, i, :] = model.predict(test_x) del test_x gc.collect() return dense_blend_test
def predcit_net_kfolds(): pre_type = "sym" # "sym" labels = pd.read_csv(path + "reference.csv") raw_IDs = labels["File_name"].values.tolist() IDs = {} IDs["sym"] = raw_IDs IDs["db4"] = [i + "_db4" for i in raw_IDs] IDs["db6"] = [i + "_db6" for i in raw_IDs] input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=9) model = Model(inputs=inputs_list, outputs=outputs) net_num = 10 test_x = [ read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num) ] model_path = './official_densenet_model/' model_name = 'densenet_extend_weights-best_one_fold.hdf5' en_amount = 1 for seed in range(en_amount): print("************************") n_fold = 3 # 3 n_classes = 9 kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed) kf = kfold.split(IDs[pre_type], labels['label1']) blend_train = np.zeros( (6500, n_fold, n_classes)).astype('float32') # len(train_x) blend_test = np.zeros( (500, n_fold, n_classes)).astype('float32') # len(test_x) count = 0 for i, (index_train, index_valid) in enumerate(kf): print('fold: ', i + 1, ' training') t = time.time() tr_IDs = np.array(IDs[pre_type]) # [index_train] # val_IDs = np.array(IDs[pre_type])[index_valid] print(tr_IDs.shape) X = np.empty((tr_IDs.shape[0], 10, 2560, 12)) for j, ID in enumerate(tr_IDs): X[j, ] = np.load("training_data/" + ID + ".npy") # X_tr = [(X[:, i] - np.mean(X[:, i])) / np.std(X[:, i]) for i in range(10)] X_tr = [ X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9] ] # print(X.shape) del X # Evaluate best trained model model.load_weights( model_path + 'densenet_extend_weights-best_k{}_r{}_0609_30.hdf5'.format( seed, i)) blend_train[:, i, :] = model.predict(X_tr) blend_test[:, i, :] = model.predict(test_x) del X_tr gc.collect() gc.collect() count += 1 index = np.arange(6500) y_train = preprocess_y(labels, index) train_y = 0.1 * blend_train[:, 0, :] + 0.1 * blend_train[:, 1, :] + 0.8 * blend_train[:, 2, :] threshold = np.arange(0.1, 0.9, 0.1) acc = [] accuracies = [] best_threshold = np.zeros(train_y.shape[1]) for i in range(train_y.shape[1]): y_prob = np.array(train_y[:, i]) for j in threshold: y_pred = [1 if prob >= j else 0 for prob in y_prob] acc.append(f1_score(y_train[:, i], y_pred, average='macro')) acc = np.array(acc) index = np.where(acc == acc.max()) accuracies.append(acc.max()) best_threshold[i] = threshold[index[0][0]] acc = [] print("best_threshold :", best_threshold) y_pred = np.array([[ 1 if train_y[i, j] >= best_threshold[j] else 0 for j in range(train_y.shape[1]) ] for i in range(len(train_y))]) print(" train data f1_score :", f1_score(y_train, y_pred, average='macro')) for i in range(9): print("f1 score of ab {} is {}".format( i, f1_score(y_train[:, i], y_pred[:, i], average='macro'))) out = 0.1 * blend_test[:, 0, :] + 0.1 * blend_test[:, 1, :] + 0.8 * blend_test[:, 2, :] y_pred_test = np.array([[ 1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1]) ] for i in range(len(out))]) classes = [0, 1, 2, 3, 4, 5, 6, 7, 8] test_y = y_pred_test y_pred = [[ 1 if test_y[i, j] >= best_threshold[j] else 0 for j in range(test_y.shape[1]) ] for i in range(len(test_y))] pred = [] for j in range(test_y.shape[0]): pred.append([classes[i] for i in range(9) if y_pred[j][i] == 1]) val_dataset_path = path + "/Val/" val_files = os.listdir(val_dataset_path) val_files.sort() with open('answers_densenet_{}_0608.csv'.format(pre_type), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow([ 'File_name', 'label1', 'label2', 'label3', 'label4', 'label5', 'label6', 'label7', 'label8' ]) count = 0 for file_name in val_files: if file_name.endswith('.mat'): record_name = file_name.strip('.mat') answer = [] answer.append(record_name) result = pred[count] answer.extend(result) for i in range(8 - len(result)): answer.append('') count += 1 writer.writerow(answer) csvfile.close()
def predict_net_one_fold(): pre_type = "sym" labels = pd.read_csv(path + "reference.csv") raw_IDs = labels["File_name"].values.tolist() IDs = {} IDs["sym"] = raw_IDs IDs["db4"] = [i + "_db4" for i in raw_IDs] IDs["db6"] = [i + "_db6" for i in raw_IDs] X = np.empty((6500, 10, 2560, 12)) for i, ID in enumerate(IDs[pre_type]): X[i, ] = np.load("training_data/" + ID + ".npy") train_x = [(X[:, i] - np.mean(X[:, i])) / np.std(X[:, i]) for i in range(10)] # [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]] index = np.arange(6500) y_train = preprocess_y(labels, index) input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=9) model = Model(inputs=inputs_list, outputs=outputs) # print(model.summary()) model_path = './official_densenet_model/' model_name = 'densenet_extend_weights-best_one_fold_0607.hdf5' model.load_weights(model_path + model_name) blend_train = model.predict(train_x) threshold = np.arange(0.1, 0.9, 0.1) acc = [] accuracies = [] best_threshold = np.zeros(blend_train.shape[1]) for i in range(blend_train.shape[1]): y_prob = np.array(blend_train[:, i]) for j in threshold: y_pred = [1 if prob >= j else 0 for prob in y_prob] acc.append(f1_score(y_train[:, i], y_pred, average='macro')) acc = np.array(acc) index = np.where(acc == acc.max()) accuracies.append(acc.max()) best_threshold[i] = threshold[index[0][0]] acc = [] print("best_threshold :", best_threshold) y_pred = np.array([[ 1 if blend_train[i, j] >= best_threshold[j] else 0 for j in range(blend_train.shape[1]) ] for i in range(len(blend_train))]) print(" train data f1_score :", f1_score(y_train, y_pred, average='macro')) for i in range(9): print("f1 score of ab {} is {}".format( i, f1_score(y_train[:, i], y_pred[:, i], average='macro'))) net_num = 10 test_x = [ read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num) ] out = model.predict(test_x) y_pred_test = np.array([[ 1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1]) ] for i in range(len(out))]) classes = [0, 1, 2, 3, 4, 5, 6, 7, 8] test_y = y_pred_test y_pred = [[ 1 if test_y[i, j] >= best_threshold[j] else 0 for j in range(test_y.shape[1]) ] for i in range(len(test_y))] pred = [] for j in range(test_y.shape[0]): pred.append([classes[i] for i in range(9) if y_pred[j][i] == 1]) val_dataset_path = path + "/Val/" val_files = os.listdir(val_dataset_path) val_files.sort() with open('answers_densenet_{}.csv'.format(pre_type), 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow([ 'File_name', 'label1', 'label2', 'label3', 'label4', 'label5', 'label6', 'label7', 'label8' ]) count = 0 for file_name in val_files: if file_name.endswith('.mat'): record_name = file_name.strip('.mat') answer = [] answer.append(record_name) result = pred[count] answer.extend(result) for i in range(8 - len(result)): answer.append('') count += 1 writer.writerow(answer) csvfile.close()
def training_net_kfolds(): train_dataset_path = path + "/Train/" val_dataset_path = path + "/Val/" train_files = os.listdir(train_dataset_path) train_files.sort() val_files = os.listdir(val_dataset_path) val_files.sort() labels = pd.read_csv(path + "reference.csv") labels_en = pd.read_csv(path + "kfold_labels_en.csv") data_info = pd.read_csv(path + "data_info.csv") input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=9) model = Model(inputs=inputs_list, outputs=outputs) # print(model.summary()) raw_IDs = labels_en["File_name"].values.tolist() extend_db4_IDs = [i + "_db4" for i in raw_IDs] extend_db6_IDs = [i + "_db6" for i in raw_IDs] all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs train_labels = labels_en["label1"].values all_train_labels = np.hstack((train_labels, train_labels, train_labels)) # Parameters params = { 'dim': (10, 2560), 'batch_size': 64, 'n_classes': 9, 'n_channels': 12, 'shuffle': True } en_amount = 1 model_path = './official_densenet_model/' for seed in range(en_amount): print("************************") n_fold = 3 n_classes = 9 kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=1234) kf = kfold.split(all_IDs, all_train_labels) for i, (index_train, index_valid) in enumerate(kf): print('fold: ', i + 1, ' training') t = time.time() tr_IDs = np.array(all_IDs)[index_train] val_IDs = np.array(all_IDs)[index_valid] print(tr_IDs.shape) # Generators training_generator = DataGenerator(tr_IDs, labels, **params) validation_generator = DataGenerator(val_IDs, labels, **params) checkpointer = ModelCheckpoint( filepath=model_path + 'densenet_extend_weights-best_k{}_r{}_0609_30.hdf5'.format( seed, i), monitor='val_fmeasure', verbose=1, save_best_only=True, save_weights_only=True, mode='max') # val_fmeasure reduce = ReduceLROnPlateau(monitor='val_fmeasure', factor=0.5, patience=2, verbose=1, min_delta=1e-4, mode='max') earlystop = EarlyStopping(monitor='val_fmeasure', patience=10) config = Config() add_compile(model, config) callback_lists = [checkpointer, reduce] history = model.fit_generator( generator=training_generator, validation_data=validation_generator, use_multiprocessing=False, epochs=30, # 40 # 20 verbose=1, callbacks=callback_lists)
def training_net_one_fold(): train_dataset_path = path + "/Train/" val_dataset_path = path + "/Val/" train_files = os.listdir(train_dataset_path) train_files.sort() val_files = os.listdir(val_dataset_path) val_files.sort() labels = pd.read_csv(path + "reference.csv") labels_en = pd.read_csv(path + "kfold_labels_en.csv") data_info = pd.read_csv(path + "data_info.csv") input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=9) model = Model(inputs=inputs_list, outputs=outputs) # print(model.summary()) raw_IDs = labels_en["File_name"].values.tolist() extend_db4_IDs = [i + "_db4" for i in raw_IDs] extend_db6_IDs = [i + "_db6" for i in raw_IDs] all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs train_labels = labels_en["label1"].values all_train_labels = np.hstack((train_labels, train_labels, train_labels)) # Parameters params = { 'dim': (10, 2560), 'batch_size': 64, 'n_classes': 9, 'n_channels': 12, 'shuffle': True } en_amount = 1 model_path = './official_densenet_model/' index = np.arange(23109) np.random.shuffle(index) index_train = index[:16176] index_valid = index[16176:] tr_IDs = np.array(all_IDs)[index_train] val_IDs = np.array(all_IDs)[index_valid] print(tr_IDs.shape) print(val_IDs.shape) # Generators training_generator = DataGenerator(tr_IDs, labels, **params) validation_generator = DataGenerator(val_IDs, labels, **params) checkpointer = ModelCheckpoint( filepath=model_path + 'densenet_extend_weights-best_one_fold_0607.hdf5', monitor='val_fmeasure', verbose=1, save_best_only=True, save_weights_only=True, mode='max') # val_fmeasure reduce = ReduceLROnPlateau(monitor='val_fmeasure', factor=0.5, patience=2, verbose=1, min_delta=1e-4, mode='max') earlystop = EarlyStopping(monitor='val_fmeasure', patience=5) config = Config() add_compile(model, config) callback_lists = [checkpointer, reduce] history = model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=False, epochs=30, verbose=1, callbacks=callback_lists)
def training_net_kfolds(): train_dataset_path = path + "/Train/" val_dataset_path = path + "/Val/" train_files = os.listdir(train_dataset_path) train_files.sort() val_files = os.listdir(val_dataset_path) val_files.sort() labels = pd.read_csv(path + "REFERENCE.csv") labels_en = pd.read_csv(path + "kfold_labels_en.csv") #data_info = pd.read_csv(path + "data_info.csv") quarter_labels = pd.read_csv( "/media/uuser/data/final_run/pro_reference.csv") input_size = (2560, 12) net_num = 10 inputs_list = [Input(shape=input_size) for _ in range(net_num)] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=10, attention=False) model = Model(inputs=inputs_list, outputs=outputs) # print(model.summary()) raw_IDs = labels_en["File_name"].values.tolist() extend_db4_IDs = [i + "_db4" for i in raw_IDs] extend_db6_IDs = [i + "_db6" for i in raw_IDs] all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs train_labels = labels_en["label1"].values all_train_labels = np.hstack((train_labels, train_labels, train_labels)) # Parameters params = { 'dim': (10, 2560), 'batch_size': 64, 'n_classes': 10, 'n_channels': 12, 'shuffle': True } en_amount = 1 model_path = './official_densenet_model/' for seed in range(en_amount): print("************************") n_fold = 3 n_classes = 10 quarter_tr_IDs = [] quarter_tr_IDs_db4 = [] quarter_tr_IDs_db6 = [] quarter_val_IDs = [] quarter_val_IDs_db4 = [] quarter_val_IDs_db6 = [] quarter_kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=1234) quarter_kf = quarter_kfold.split( quarter_labels["File_name"].values.tolist(), quarter_labels["label1"].values) for quarter_i, (quarter_index_train, quarter_index_valid) in enumerate(quarter_kf): print('quarter_fold: ', quarter_i + 1, ' training') quarter_tr_IDs.append(quarter_labels["File_name"]. values[quarter_index_train].tolist()) quarter_val_IDs.append(quarter_labels["File_name"]. values[quarter_index_valid].tolist()) for j in range(4): for ids in quarter_labels[quarter_labels.label1 == 4]["File_name"]: if ids in quarter_tr_IDs: quarter_tr_IDs.append(ids) for j in range(2): for ids in quarter_labels[quarter_labels.label1 == 7]["File_name"]: if ids in quarter_tr_IDs: quarter_tr_IDs.append(ids) quarter_tr_IDs_db4.append( [ids + "_db4" for ids in quarter_tr_IDs[quarter_i]]) quarter_tr_IDs_db6.append( [ids + "_db6" for ids in quarter_tr_IDs[quarter_i]]) quarter_val_IDs_db4.append( [ids + "_db4" for ids in quarter_val_IDs[quarter_i]]) quarter_val_IDs_db6.append( [ids + "_db6" for ids in quarter_val_IDs[quarter_i]]) kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=1234) #kf = kfold.split(all_IDs, all_train_labels) kf = kfold.split(labels["File_name"].values.tolist(), labels["label1"].values) for i, (index_train, index_valid) in enumerate(kf): print('fold: ', i + 1, ' training') t = time.time() print(index_train) #tr_IDs = np.array(all_IDs)[index_train] #val_IDs = np.array(all_IDs)[index_valid] #print(tr_IDs.shape) ''' ''' tr_IDs = labels["File_name"].values[index_train].tolist() val_IDs = labels["File_name"].values[index_valid].tolist() for j in range(4): for ids in labels[labels.label1 == 4]["File_name"]: if ids in tr_IDs: tr_IDs.append(ids) for j in range(2): for ids in labels[labels.label1 == 7]["File_name"]: if ids in tr_IDs: tr_IDs.append(ids) for j in range(1): for ids in labels[labels.label1 == 9]["File_name"]: if ids in tr_IDs: tr_IDs.append(ids) tr_IDs_db4 = [ids + "_db4" for ids in tr_IDs] tr_IDs_db6 = [ids + "_db6" for ids in tr_IDs] val_IDs_db4 = [ids + "_db4" for ids in val_IDs] val_IDs_db6 = [ids + "_db6" for ids in val_IDs] tr_IDs = tr_IDs + tr_IDs_db4 + tr_IDs_db6 + quarter_tr_IDs[ i] + quarter_tr_IDs_db4[i] + quarter_tr_IDs_db6[i] val_IDs = val_IDs + val_IDs_db4 + val_IDs_db6 + quarter_val_IDs[ i] + quarter_val_IDs_db4[i] + quarter_val_IDs_db6[i] print("tr_IDs : ", len(tr_IDs)) print("val_IDs : ", len(val_IDs)) # Generators training_generator = DataGenerator(tr_IDs, labels, quarter_labels, **params) validation_generator = DataGenerator(val_IDs, labels, quarter_labels, **params) checkpointer = ModelCheckpoint( filepath=model_path + 'densenet_extend_weights-best_k{}_r{}_0807_30_add_quarter.hdf5' .format(seed, i), monitor='val_fmeasure', verbose=1, save_best_only=True, save_weights_only=True, mode='max') # val_fmeasure reduce = ReduceLROnPlateau(monitor='val_fmeasure', factor=0.5, patience=2, verbose=1, min_delta=1e-5, mode='max') earlystop = EarlyStopping(monitor='val_fmeasure', mode="max", patience=6, restore_best_weights=True) tensorboard = TensorBoard(log_dir="./logs") config = Config() add_compile(model, config) callback_lists = [checkpointer, reduce, earlystop] history = model.fit_generator( generator=training_generator, validation_data=validation_generator, use_multiprocessing=False, epochs=30, # 40 # 20 verbose=1, callbacks=callback_lists)
inputs1 = Input(shape=(2560, 12)) inputs2 = Input(shape=(2560, 12)) inputs3 = Input(shape=(2560, 12)) inputs4 = Input(shape=(2560, 12)) inputs5 = Input(shape=(2560, 12)) inputs6 = Input(shape=(2560, 12)) inputs7 = Input(shape=(2560, 12)) inputs8 = Input(shape=(2560, 12)) inputs9 = Input(shape=(2560, 12)) inputs_list = [ inputs0, inputs1, inputs2, inputs3, inputs4, inputs5, inputs6, inputs7, inputs8, inputs9 ] net = Net() outputs = net.nnet(inputs_list, 0.5, num_classes=9) model = Model(inputs=inputs_list, outputs=outputs) #print(model.summary()) bin_label = np.zeros((6500, 9)) for i in range(labels.shape[0]): label_nona = labels.loc[i].dropna() for j in range(1, label_nona.shape[0]): bin_label[i, int(label_nona[j])] = 1 cv_pred_all = 0 en_amount = 1 labels_en = pd.read_csv(path + "kfold_labels_en.csv")