def train(aug_no, model_mode='mymodel', set_epochs=10, do_es=False): train_dir = os.path.join(cwd, "da_concat_{}".format(aug_no)) train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) print("train data shape : ", train_data.shape) print("train label shape : ", train_label.shape) INPUT_SIZE = train_data.shape[1] print("INPUT_SIZE: ", INPUT_SIZE) CHANNEL = train_data.shape[3] print("set channel : ", CHANNEL) batch_size = 10 print("set batch_size : ", batch_size) mh = ModelHandler(INPUT_SIZE, CHANNEL) if model_mode == 'mymodel': model = mh.buildMyModel() elif model_mode == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() if do_es: es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') es = [es] else: es = None history = model.fit(train_data, train_label, batch_size=batch_size, epochs=set_epochs, validation_data=(validation_data, validation_label), callbacks=es, verbose=1) # make log dir ----- if do_es: log_dir = os.path.join(cwd, 'log_with_es') else: log_dir = os.path.join(cwd, 'log') os.makedirs(log_dir, exist_ok=True) child_log_dir = os.path.join(log_dir, '{}_{}'.format(aug_no, model_mode)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, '{}_{}_model.h5'.format(aug_no, model_mode)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, '{}_{}_history.pkl'.format(aug_no, model_mode)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("export logs in ", child_log_dir)
def main(): cwd = os.getcwd() sub_prj = os.path.dirname(cwd) sub_prj_root = os.path.dirname(sub_prj) prj_root = os.path.dirname(sub_prj_root) data_dir = os.path.join(prj_root, "datasets") data_src = os.path.join(data_dir, "small_721") print("\ndata source: ", data_src) use_da_data = False if use_da_data: train_dir = os.path.join(data_src, "train_with_aug") else: train_dir = os.path.join(data_src, "train") validation_dir = os.path.join(data_src, "validation") test_dir = os.path.join(data_src, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- data_gen = ImageDataGenerator(rescale=1. / 255) train_generator = data_gen.flow_from_directory(train_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') validation_generator = data_gen.flow_from_directory( validation_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') test_generator = data_gen.flow_from_directory(test_dir, target_size=target_size, batch_size=batch_size, shuffle=False, class_mode='categorical') data_checker, label_checker = next(train_generator) print("train data shape (in batch): ", data_checker.shape) print("train label shape (in batch): ", label_checker.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(input_size, channel) model = mh.buildMyModel() model.summary() # instance EarlyStopping ----- es = EarlyStopping( monitor='val_loss', # monitor='val_accuracy', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") steps_per_epoch = train_generator.n // batch_size validation_steps = validation_generator.n // batch_size print(steps_per_epoch, " [steps / epoch]") print(validation_steps, " (validation steps)") start = time.time() history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=[es], verbose=1) elapsed_time = time.time() - start print("elapsed time (for train): {} [sec]".format(elapsed_time)) # evaluate ---------- print("\nevaluate sequence...") accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs) - 1]) test_steps = test_generator.n // batch_size eval_res = model.evaluate_generator(test_generator, steps=test_steps, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # logging and detail outputs ----- # make log_dirctory log_dir = os.path.join(sub_prj, "outputs", "logs") os.makedirs(log_dir, exist_ok=True) model_log_dir = os.path.join(sub_prj, "outputs", "models") os.makedirs(log_dir, exist_ok=True) now = datetime.datetime.now() child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_log_dir, exist_ok=True) child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_model_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_model_log_dir, "model.h5") model.save(model_file) print("\nexport model in ", child_model_log_dir) print("\npredict sequence...") pred = model.predict_generator(test_generator, steps=test_steps, verbose=1) test_label = [] for i in range(test_steps): _, tmp_tl = next(test_generator) if i == 0: test_label = tmp_tl else: test_label = np.vstack((test_label, tmp_tl)) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") # save history save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) csv_file = os.path.join(child_log_dir, "result.csv") df_result.to_csv(csv_file) print("\nexport history in ", csv_file)
def main(N, LEARN_PATH, MODE, BUILD_MODEL, EPOCHS=60, BATCH_SIZE=20, FINE_TUNE_AT=81): total_data, total_label = inputDataCreator(os.path.join( LEARN_PATH, "natural"), 224, normalize=True) #one_hot=True print("\ntotal_data shape: ", total_data.shape) print("total_label shape: ", total_label.shape) if MODE == 'auged': auged_dir = os.path.join(LEARN_PATH, "auged") EPOCHS = EPOCHS // 2 total_auged_data, total_auged_label = inputDataCreator(auged_dir, 224, normalize=True, one_hot=True) print("\n total auged_data : ", total_auged_data.shape) input_size = total_data.shape[1] channel = total_data.shape[3] mh = ModelHandler(input_size, channel) skf = StratifiedKFold(n_splits=5) k = 0 for traval_idx, test_idx in skf.split(total_data, total_label): print("\nK-Fold Cross-Validation k:{} ==========".format(k)) print("\ntrain indices: \n", traval_idx) print("\ntest indices: \n", test_idx) test_data = total_data[test_idx] test_label = total_label[test_idx] print("-----*-----*-----") traval_data = total_data[traval_idx] traval_label = total_label[traval_idx] # print(traval_data.shape) # print(traval_label.shape) traval_label = np.identity(2)[traval_label.astype(np.int8)] test_label = np.identity(2)[test_label.astype(np.int8)] train_data, train_label, validation_data, validation_label, _, _ = dataSplit( traval_data, traval_label, train_rate=3 / 4, validation_rate=1 / 4, test_rate=0) if MODE == 'auged': print("\nadd auged data to train_data...") auged_traval_data = total_auged_data[traval_idx] auged_traval_label = total_auged_label[traval_idx] auged_train_data, auged_train_label, _, _, _, _ = dataSplit( auged_traval_data, auged_traval_label, train_rate=3 / 4, validation_rate=1 / 4, test_rate=0) print(" append auged data: ", auged_train_data.shape) print("\n concatnate auged data with native data...") train_data = np.vstack((train_data, auged_train_data)) train_label = np.vstack((train_label, auged_train_label)) print(" Done.") print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True, mode='auto') print("set epochs: ", EPOCHS) if BUILD_MODEL == 'mymodel': model = mh.buildMyModel() # normal train ---------- print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, BATCH_SIZE, epochs=EPOCHS, vlidation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start elif BUILD_MODEL == 'tlearn': # あとで重みの解凍をできるように base_model を定義 base_model = mh.buildMnv1Base() base_model.trainable = False model = mh.addChead(base_model) print("\ntraining sequence start .....") start = time.time() # 準備体操 ----- print("\nwarm up sequence .....") model.summary() _history = model.fit(train_data, train_label, BATCH_SIZE, epochs=10, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) # fine tuning ----- print("\nfine tuning.....") mh.setFineTune(base_model, model, FINE_TUNE_AT) model.summary() history = model.fit(train_data, train_label, BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start # training end accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] log_dir = os.path.join(os.path.dirname(cwd), "flog") os.makedirs(log_dir, exist_ok=True) """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(MODE, BUILD_MODEL, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(MODE, BUILD_MODEL, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(MODE, BUILD_MODEL, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=2) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=2) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) if k == 0: df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) else: series = pd.Series(save_dict) df_result[k] = series print(df_result) # undefine ---------- # del total_data, total_label del traval_data, traval_label if MODE == 'auged': # del total_auged_data, total_auged_label del auged_traval_data, auged_traval_label del auged_train_data, auged_train_label del train_data, train_label del validation_data, validation_label del test_data, test_label del model del _history, history # clear session against OOM Error keras.backend.clear_session() gc.collect() k += 1 csv_file = os.path.join( log_dir, "sample_{}_{}_{}_result.csv".format(N, MODE, BUILD_MODEL)) df_result.to_csv(csv_file) print("\nexport {} as CSV.".format(csv_file))
def main(data_mode, model_mode, no, set_epochs=60, do_es=False): batch_size = 10 if data_mode == 'native': data_dir = os.path.join(cwd, "experiment_{}".format(no)) total_data, total_label = inputDataCreator(data_dir, 224, normalize=True, one_hot=True) train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit( total_data, total_label) """ print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) """ datagen = ImageDataGenerator() train_generator = datagen.flow( train_data, train_label, # target_size=(224, 224), batch_size=batch_size, shuffle=True) validation_generator = datagen.flow( validation_data, validation_label, # target_size=(224, 224), batch_size=batch_size, shuffle=True) test_generator = datagen.flow( test_data, test_label, # target_size=(224, 224), batch_size=batch_size, shuffle=True) elif data_mode == 'auged': set_epochs = int(set_epochs / 2) data_dir = os.path.join(cwd, "concat_experiment_{}".format(no)) train_dir = os.path.join(data_dir, "train") validation_dir = os.path.join(data_dir, "validation") test_dir = os.path.join(data_dir, "test") datagen = ImageDataGenerator(rescale=1 / 255.0) train_generator = datagen.flow_from_directory(train_dir, target_size=(224, 224), batch_size=batch_size, shuffle=True, class_mode='categorical') validation_generator = datagen.flow_from_directory( validation_dir, target_size=(224, 224), batch_size=batch_size, shuffle=True, class_mode='categorical') test_generator = datagen.flow_from_directory(test_dir, target_size=(224, 224), batch_size=batch_size, shuffle=True, class_mode='categorical') data_sample, label_sample = next(train_generator) input_size = data_sample.shape[1] channel = data_sample.shape[3] print("input_size: {} | channel: {}".format(input_size, channel)) print("set epochs: ", set_epochs) mh = ModelHandler(input_size, channel) if model_mode == 'mymodel': model = mh.buildMyModel() elif model_mode == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() if do_es: es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') es = [es] else: es = None print("\ntraining sequence start .....") steps_per_epoch = train_generator.n // batch_size validation_steps = validation_generator.n // batch_size print(steps_per_epoch, " [steps / epoch]") print(validation_steps, " (validation steps)") start = time.time() history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, verbose=1) elapsed_time = time.time() - start accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] if do_es: log_dir = os.path.join(cwd, "log_with_es") else: log_dir = os.path.join(cwd, "log") os.makedirs(log_dir, exist_ok=True) """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(data_mode, model_mode, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(data_mode, model_mode, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(data_mode, model_mode, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict_generator(test_generator, verbose=1) label_name_list = [] if data_mode == 'auged': test_steps = test_generator.n // batch_size for i in range(test_steps): _, tmp = next(test_generator) if i == 0: test_label = tmp else: test_label = np.vstack((test_label, tmp)) for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_generator, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) # undefine validable ---------- del datagen del train_generator, validation_generator, test_generator # due to CPU memory --------- # del train_data, train_label # del validation_data, validation_label # del test_data, test_label # del set_epochs # due to GPU memory --------- # del mh, del model del history # del accs, losses, val_accs, val_losses # del pred, df_pred, label_name_list # del confuse, collect # del eval_res keras.backend.clear_session() gc.collect() return save_dict
def main(data_mode, model_mode, no, set_epochs=60, do_es=False): cwd = os.getcwd() data_dir = os.path.join(cwd, "experiment_{}".format(no)) total_data, total_label = inputDataCreator( data_dir, 224, normalize=True, #one_hot=True ) print("\ntotal_data shape: ", total_data.shape) print("total_label shape: ", total_label.shape) if data_mode == 'auged': base_dir, data_dir_name = os.path.split(data_dir) data_dir_name = "auged_" + data_dir_name auged_dir = os.path.join(base_dir, data_dir_name) set_epochs = int(set_epochs / 2) total_auged_data, total_auged_label = inputDataCreator(auged_dir, 224, normalize=True, one_hot=True) print("\n total auged_data : ", total_auged_data.shape) input_size = total_data.shape[1] channel = total_data.shape[3] mh = ModelHandler(input_size, channel) skf = StratifiedKFold(n_splits=10) k = 0 for traval_idx, test_idx in skf.split(total_data, total_label): print("\nK-Fold Cross-Validation k:{} ==========".format(k)) print("\ntrain indices: \n", traval_idx) print("\ntest indices: \n", test_idx) test_data = total_data[test_idx] test_label = total_label[test_idx] print("-----*-----*-----") traval_data = total_data[traval_idx] traval_label = total_label[traval_idx] # print(traval_data.shape) # print(traval_label.shape) traval_label = np.identity(2)[traval_label.astype(np.int8)] test_label = np.identity(2)[test_label.astype(np.int8)] train_data, train_label, validation_data, validation_label, _, _ = dataSplit( traval_data, traval_label, train_rate=2 / 3, validation_rate=1 / 3, test_rate=0) if data_mode == 'auged': print("\nadd auged data to train_data...") auged_traval_data = total_auged_data[traval_idx] auged_traval_label = total_auged_label[traval_idx] auged_train_data, auged_train_label, _, _, _, _ = dataSplit( auged_traval_data, auged_traval_label, train_rate=2 / 3, validation_rate=1 / 3, test_rate=0) print(" append auged data: ", auged_train_data.shape) print("\n concatnate auged data with native data...") train_data = np.vstack((train_data, auged_train_data)) train_label = np.vstack((train_label, auged_train_label)) print(" Done.") print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) print("\ntest data shape: ", test_data.shape) print("test label shape: ", test_label.shape) if model_mode == 'mymodel': model = mh.buildMyModel() elif model_mode == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() if do_es: es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') es = [es] else: es = None batch_size = 10 print("set epochs: ", set_epochs) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size, epochs=set_epochs, validation_data=(validation_data, validation_label), callbacks=es, verbose=1) elapsed_time = time.time() - start accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] if do_es: log_dir = os.path.join(cwd, "rlog_with_es") else: log_dir = os.path.join(cwd, "rlog") os.makedirs(log_dir, exist_ok=True) child_log_dir = os.path.join( log_dir, "{}_{}_{}".format(data_mode, model_mode, no)) os.makedirs(child_log_dir, exist_ok=True) """ # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(data_mode, model_mode, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(data_mode, model_mode, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) if k == 0: df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) else: series = pd.Series(save_dict) df_result[k] = series print(df_result) # undefine ---------- # del total_data, total_label del traval_data, traval_label if data_mode == 'auged': # del total_auged_data, total_auged_label del auged_traval_data, auged_traval_label del auged_train_data, auged_train_label del train_data, train_label del validation_data, validation_label del test_data, test_label del model del history del pred del df_pred, label_name_list, confuse, collect del eval_res del accs, losses, val_accs, val_losses # clear session against OOM Error keras.backend.clear_session() gc.collect() k += 1 csv_file = os.path.join(child_log_dir, "{}_{}_result.csv".format(data_mode, model_mode)) df_result.to_csv(csv_file) print("\nexport {} as CSV.".format(csv_file)) # delete valables at all in end of this program ----- del cwd, data_dir del total_data, total_label del save_dict, df_result if data_mode == 'auged': del total_auged_data, total_auged_label del base_dir, data_dir_name, auged_dir gc.collect()
def main(): cwd = os.getcwd() sub_prj = os.path.dirname(cwd) """ sub_prj_root = os.path.dirname(sub_prj) prj_root = os.path.dirname(sub_prj_root) """ data_dir = os.path.join(sub_prj, "datasets") data_src = os.path.join(data_dir, "medium_721") print("\ndata source: ", data_src) """ use_da_data = False increase_val = False print( "\nmode: Use Augmented data: {} | increase validation data: {}".format(use_da_data, increase_val) ) # First define original train_data only as train_dir train_dir = os.path.join(data_dir, "train") if (use_da_data == True) and (increase_val == False): # with_augmented data (no validation increase) train_dir = os.path.join(data_dir, "train_with_aug") validation_dir = os.path.join(data_dir, "val") # original validation data # pair of decreaced train_data and increased validation data if (increase_val == True): train_dir = os.path.join(data_dir, "red_train") if (use_da_data == True): train_dir = os.path.join(data_dir, "red_train_with_aug") validation_dir = os.path.join(data_dir, "validation") """ use_da_data = True if use_da_data: train_dir = os.path.join(data_src, "train_with_aug") else: train_dir = os.path.join(data_src, "train") validation_dir = os.path.join(data_src, "validation") test_dir = os.path.join(data_src, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) # print("test_dir: ", test_dir) # data load ---------- data_gen = ImageDataGenerator(rescale=1. / 255) train_generator = data_gen.flow_from_directory(train_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') validation_generator = data_gen.flow_from_directory( validation_dir, target_size=target_size, batch_size=batch_size, shuffle=True, class_mode='categorical') test_generator = data_gen.flow_from_directory(test_dir, target_size=target_size, batch_size=batch_size, shuffle=False, class_mode='categorical') data_checker, label_checker = next(train_generator) print("train data shape (in batch): ", data_checker.shape) print("train label shape (in batch): ", label_checker.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(input_size, channel) model = mh.buildMyModel() model.summary() # instance EarlyStopping ----- es = EarlyStopping( monitor='val_loss', # monitor='val_accuracy', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") steps_per_epoch = train_generator.n // batch_size validation_steps = validation_generator.n // batch_size print(steps_per_epoch, " [steps / epoch]") print(validation_steps, " (validation steps)") start = time.time() history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=set_epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=[es], verbose=1) print("elapsed time (for train): {} [sec]".format(time.time() - start)) print("\nevaluate sequence...") test_steps = test_generator.n // batch_size eval_res = model.evaluate_generator(test_generator, steps=test_steps, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # confusion matrix ----- print("\nconfusion matrix") pred = model.predict_generator(test_generator, steps=test_steps, verbose=3) test_label = [] for i in range(test_steps): _, tmp_tl = next(test_generator) if i == 0: test_label = tmp_tl else: test_label = np.vstack((test_label, tmp_tl)) idx_label = np.argmax(test_label, axis=-1) # one_hot => normal idx_pred = np.argmax(pred, axis=-1) # 各 class の確率 => 最も高い値を持つ class cm = confusion_matrix(idx_label, idx_pred) # Calculate Precision and Recall tn, fp, fn, tp = cm.ravel() print(" | T | F ") print("--+----+---") print("N | {} | {}".format(tn, fn)) print("--+----+---") print("P | {} | {}".format(tp, fp)) # 適合率 (precision): # precision = tp/(tp+fp) # print("Precision of the model is {}".format(precision)) # 再現率 (recall): # recall = tp/(tp+fn) # print("Recall of the model is {}".format(recall)) # save model ----- save_location = os.path.join(sub_prj, "outputs", "models") if use_da_data: save_file = os.path.join(save_location, "mymodel_auged.h5") else: save_file = os.path.join(save_location, "mymodel.h5") model.save(save_file) print("\nmodel has saved in", save_file)
def main(N, LEARN_PATH, DATA_MODE, BUILD_MODEL, EPOCHS=60): sample_dir = os.path.join(LEARN_PATH, "sample_{}".format(N)) use_da_data = False if use_da_data: train_dir = os.path.join(sample_dir, "train_with_aug") else: train_dir = os.path.join(sample_dir, "train") validation_dir = os.path.join(sample_dir, "validation") test_dir = os.path.join(sample_dir, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, 224, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, 224, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, 224, normalize=True, one_hot=True) print("\ntrain data shape: ", train_data.shape) print("train label shape: ", train_label.shape) print("\nvalidation data shape: ", validation_data.shape) print("validation label shape: ", validation_label.shape) input_size = train_data.shape[1] channel = train_data.shape[3] batch_size = 10 print("set epochs: ", EPOCHS) mh = ModelHandler(input_size, channel) if BUILD_MODEL == 'mymodel': model = mh.buildMyModel() elif BUILD_MODEL == 'tlearn': model = mh.buildTlearnModel(base='mnv1') model.summary() """ es = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore) """ # early stopping es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start print("elapsed time (for train): {} [sec]".format(time.time() - start)) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] """ # logging and detail outputs ----- # make log_dirctory log_dir = os.path.join(sub_prj, "outputs", "logs") os.makedirs(log_dir, exist_ok=True) model_log_dir = os.path.join(sub_prj, "outputs", "models") os.makedirs(log_dir, exist_ok=True) now = datetime.datetime.now() child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_log_dir, exist_ok=True) child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_model_log_dir, exist_ok=True) """ """ if do_es: log_dir = os.path.join(cwd, "log_with_es") else: log_dir = os.path.join(cwd, "log") os.makedirs(log_dir, exist_ok=True) """ """ child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(DATA_MODE, BUILD_MODEL, no)) os.makedirs(child_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(DATA_MODE, BUILD_MODEL, no)) model.save(model_file) # save history history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(DATA_MODE, BUILD_MODEL, no)) with open(history_file, 'wb') as p: pickle.dump(history.history, p) print("\nexport logs in ", child_log_dir) """ print("\npredict sequence...") pred = model.predict(test_data, batch_size=10, verbose=1) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %") print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # ---------- save_dict = {} save_dict['last_loss'] = losses[len(losses) - 1] save_dict['last_acc'] = accs[len(accs) - 1] save_dict['last_val_loss'] = val_losses[len(val_losses) - 1] save_dict['last_val_acc'] = val_accs[len(val_accs) - 1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print(save_dict) # 重そうなものは undefine してみる #del train_data, train_label, validation_data, validation_label, test_data, test_label del model del history #del pred keras.backend.clear_session() gc.collect() return save_dict
def main(LEARN_PATH, INPUT_SIZE, CHANNEL, BATCH_SIZE, EPOCHS): target_size = (INPUT_SIZE, INPUT_SIZE) input_shape = (INPUT_SIZE, INPUT_SIZE, CHANNEL) data_src = LEARN_PATH print("\ndata source: ", data_src) use_da_data = False if use_da_data: train_dir = os.path.join(data_src, "train_with_aug") else: train_dir = os.path.join(data_src, "train") validation_dir = os.path.join(data_src, "validation") test_dir = os.path.join(data_src, "test") print("train_dir: ", train_dir) print("validation_dir: ", validation_dir) print("test_dir: ", test_dir) # data load ---------- train_data, train_label = inputDataCreator(train_dir, INPUT_SIZE, normalize=True, one_hot=True) validation_data, validation_label = inputDataCreator(validation_dir, INPUT_SIZE, normalize=True, one_hot=True) test_data, test_label = inputDataCreator(test_dir, INPUT_SIZE, normalize=True, one_hot=True) """ total_data, total_label = inputDataCreator(data_dir, 224, normalize=True, one_hot=True) train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit(total_data, total_label) """ print("train data shape (in batch): ", train_data.shape) print("train label shape (in batch): ", train_label.shape) # print("validation data shape:", validation_data.shape) # print("validation label shape:", validation_label.shape) # print("test data shape:", test_data.shape) # print("test label shape:", test_label.shape) # build model ---------- mh = ModelHandler(INPUT_SIZE, CHANNEL) model = mh.buildMyModel() model.summary() # instance EarlyStopping ----- es = EarlyStopping(monitor='val_loss', # monitor='val_accuracy', patience=5, verbose=1, restore_best_weights=True) print("\ntraining sequence start .....") start = time.time() history = model.fit(train_data, train_label, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_data, validation_label), callbacks=[es], verbose=2) elapsed_time = time.time() - start print( "elapsed time (for train): {} [sec]".format(elapsed_time) ) accs = history.history['accuracy'] losses = history.history['loss'] val_accs = history.history['val_accuracy'] val_losses = history.history['val_loss'] print("last val_acc: ", val_accs[len(val_accs)-1]) # evaluate ---------- print("\nevaluate sequence...") eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=2) print("result loss: ", eval_res[0]) print("result score: ", eval_res[1]) # logging and detail outputs ----- # make log_dirctory cwd = os.getcwd() sub_prj = os.path.dirname(cwd) log_dir = os.path.join(sub_prj, "outputs", "logs") os.makedirs(log_dir, exist_ok=True) model_log_dir = os.path.join(sub_prj, "outputs", "models") os.makedirs(log_dir, exist_ok=True) now = datetime.datetime.now() child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_log_dir, exist_ok=True) child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now)) os.makedirs(child_model_log_dir, exist_ok=True) # save model & weights model_file = os.path.join(child_model_log_dir, "model.h5") model.save(model_file) print("\nexport model in ", child_model_log_dir) # predict -> confusion matrix ---------- print("\npredict sequence...") pred = model.predict(test_data, batch_size=BATCH_SIZE, verbose=2) label_name_list = [] for i in range(len(test_label)): if test_label[i][0] == 1: label_name_list.append('cat') elif test_label[i][1] == 1: label_name_list.append('dog') #print("result: ", pred) df_pred = pd.DataFrame(pred, columns=['cat', 'dog']) df_pred['class'] = df_pred.idxmax(axis=1) df_pred['label'] = pd.DataFrame(label_name_list, columns=['label']) df_pred['collect'] = (df_pred['class'] == df_pred['label']) confuse = df_pred[df_pred['collect'] == False].index.tolist() collect = df_pred[df_pred['collect'] == True].index.tolist() print(df_pred) print("\nwrong recognized indeices are ", confuse) print(" wrong recognized amount is ", len(confuse)) print("\ncollect recognized indeices are ", collect) print(" collect recognized amount is ", len(collect)) print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %") # save history save_dict = {} save_dict['last_loss'] = losses[len(losses)-1] save_dict['last_acc'] = accs[len(accs)-1] save_dict['last_val_loss'] = val_losses[len(val_losses)-1] save_dict['last_val_acc'] = val_accs[len(val_accs)-1] save_dict['n_confuse'] = len(confuse) save_dict['eval_loss'] = eval_res[0] save_dict['eval_acc'] = eval_res[1] save_dict['elapsed_time'] = elapsed_time print("save result dict:", save_dict) df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys()) csv_file = os.path.join( child_log_dir, "result.csv" ) df_result.to_csv(csv_file) print("\nexport result in ", csv_file) # confusion matrix ----- # Predict # 0 | 1 # --+----+----- # 0 | TN | FP # label -------+----- # 1 | FN | TP print("\nconfusion matrix") idx_label = np.argmax(test_label, axis=-1) # one_hot => normal idx_pred = np.argmax(pred, axis=-1) # 各 class の確率 => 最も高い値を持つ class cm = confusion_matrix(idx_label, idx_pred) # Calculate Precision and Recall tn, fp, fn, tp = cm.ravel() print(" | T | F ") print("--+----+---") print("N | {} | {}".format(tn, fn)) print("--+----+---") print("P | {} | {}".format(tp, fp)) # 適合率 (precision): # sklearn.metrics => precision_score() にも libaray がある。 # # 入力は (idx_label, idx_pred) # 「陽性と予測されたサンプルのうち, 正解したサンプルの割合」 # PPV (positive predictive value) とも呼ばれる。 precision = tp/(tp+fp) print("Precision of the model is {}".format(precision)) # 再現率 (recall: # sklearn.metrics => recall_score() にも library がある # # 入力は (idx_label, idx_pred) # 「実際に陽性のサンプルのうち, 正解したサンプルの割合」。 # sensitivity や hit rate, # TPR (true positive rate, 真陽性率) などとも呼ばれる。 # # すごく大雑把にいえば # # class 1 だけに対して考えた正解率の様な指標だと言える # # (Negative 側がどれだけ正解/不正解かは don't care) # # 逆に TN / (TN + FP) とすると # # class 0 だけに対する正解率となる。 recall = tp/(tp+fn) print("Recall of the model is {}".format(recall))