def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'synth90k_crnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180302T155710' initial_epoch = 0 # When outputs are not sparse, CRNN model's output shape = (samples, 32, num_classes) and dataset's output shape = (samples, 23, num_classes). is_sparse_output = True # Fixed. #is_time_major = False # Fixed. # NOTE [info] >> Places with the same parameters. # class Synth90kLabelConverter in ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py. # class Synth90kPreprocessor. image_height, image_width, image_channel = 32, 128, 1 max_label_len = 23 # Max length of words in lexicon. # Label: 0~9 + a~z + A~Z. #label_characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' # Label: 0~9 + a~z. label_characters = '0123456789abcdefghijklmnopqrstuvwxyz' SOS = '<SOS>' # All strings will start with the Start-Of-String token. EOS = '<EOS>' # All strings will end with the End-Of-String token. #extended_label_list = [SOS] + list(label_characters) + [EOS] extended_label_list = list(label_characters) + [EOS] #extended_label_list = list(label_characters) label_int2char = extended_label_list label_char2int = {c:i for i, c in enumerate(extended_label_list)} num_labels = len(extended_label_list) num_classes = num_labels + 1 # extended labels + blank label. # NOTE [info] >> The largest value (num_classes - 1) is reserved for the blank label. blank_label = num_classes - 1 label_eos_token = label_char2int[EOS] #label_eos_token = blank_label batch_size = 256 # Number of samples per gradient update. num_epochs = 100 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() #augmenter = create_imgaug_augmenter() # If imgaug augmenter is used, data are augmented in background augmentation processes. (faster) is_output_augmented = False #use_multiprocessing = True # Fixed. Batch generators & loaders are used in case of multiprocessing. #use_file_batch_loader = True # Fixed. It is not related to multiprocessing. num_loaded_files_at_a_time = 5 num_processes = 5 train_batch_dir_path_prefix = './train_batch_dir' num_train_batch_dirs = 10 val_batch_dir_path_prefix = './val_batch_dir' num_val_batch_dirs = 1 test_batch_dir_path_prefix = './test_batch_dir' num_test_batch_dirs = 1 batch_info_csv_filename = 'batch_info.csv' sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares multiprocessing. # set_start_method() should not be used more than once in the program. #mp.set_start_method('spawn') BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager) BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager) BaseManager.register('NpzFileBatchGeneratorFromNpyFiles', NpzFileBatchGeneratorFromNpyFiles) #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader) manager = BaseManager() manager.start() lock = mp.Lock() #lock = mp.Manager().Lock() # TypeError: can't pickle _thread.lock objects. #-------------------- # Prepares directories. output_dir_path = os.path.join('.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. # NOTE [info] >> Generate synth90k dataset using swl.language_processing.synth90k_dataset.save_synth90k_dataset_to_npy_files(). # Refer to ${SWL_PYTHON_HOME}/test/language_processing/synth90k_dataset_test.py. synth90k_base_dir_path = './synth90k_npy' train_input_filepaths, train_output_filepaths, val_input_filepaths, val_output_filepaths, test_input_filepaths, test_output_filepaths = load_data(synth90k_base_dir_path) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): #K.set_learning_phase(1) # Sets the learning phase to 'train'. (Required) # Creates a model. modelForTraining = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleCrnnTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForEvaluation = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForInference = create_synth90k_crnn(image_height, image_width, image_channel, num_classes, label_eos_token, is_sparse_output) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs) print('\tWaiting for a validation batch directory...') while True: val_dir_path = valDirMgr.requestDirectory() if val_dir_path is not None: break else: time.sleep(0.1) print('\tGot a validation batch directory: {}.'.format(val_dir_path)) valFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(val_input_filepaths, val_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = valFileBatchGenerator.saveBatches(val_dir_path) # Generates and saves batches. print('\t#saved examples = {}.'.format(num_saved_examples)) valDirMgr.returnDirectory(val_dir_path) #-------------------- # Multiprocessing (augmentation) + multithreading (training). trainDirMgr = TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs) training_worker_thread = threading.Thread(target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr, valDirMgr, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, is_sparse_output)) training_worker_thread.start() trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager(train_batch_dir_path_prefix, num_train_batch_dirs) #valDirMgr_mp = manager.WorkingDirectoryManager(val_batch_dir_path_prefix, num_val_batch_dirs) #trainFileBatchGenerator_mp = manager.NpzFileBatchGeneratorFromNpyFiles(train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock,)) as pool: data_augmentation_results = pool.map_async(partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_input_filepaths, train_output_filepaths, num_loaded_files_at_a_time, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) data_augmentation_results.get(timeout) training_worker_thread.join() #-------------------- valFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_loader(sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, num_test_batch_dirs) #-------------------- print('\tWaiting for a test batch directory...') while True: test_dir_path = testDirMgr.requestDirectory() if test_dir_path is not None: break else: time.sleep(0.1) print('\tGot a test batch directory: {}.'.format(test_dir_path)) testFileBatchGenerator = NpzFileBatchGeneratorFromNpyFiles(test_input_filepaths, test_output_filepaths, num_loaded_files_at_a_time, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) num_saved_examples = testFileBatchGenerator.saveBatches(test_dir_path) # Generates and saves batches. print('\t#saved examples = {}.'.format(num_saved_examples)) testDirMgr.returnDirectory(test_dir_path) #-------------------- testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=Synth90kPreprocessor(is_sparse_output)) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader(sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) #-------------------- if inferences is not None: if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero(np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format(correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'reverse_function_seq2seq' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180222T144236' max_gradient_norm = 5 initial_epoch = 0 batch_size = 4 # Number of samples per gradient update. num_epochs = 70 # Number of times to iterate over training data. shuffle = True augmenter = None is_output_augmented = False sess_config = tf.ConfigProto() #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepare directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepare data. if 'posix' == os.name: data_home_dir_path = '/home/sangwook/my_dataset' else: data_home_dir_path = 'D:/dataset' data_dir_path = data_home_dir_path + '/phenotyping/RDA/all_plants_mask' plant_mask_list_file_name = '/plant_mask_list.json' plant_mask_list, max_size = RdaPlantDataset.load_masks_from_json( data_dir_path, plant_mask_list_file_name) #plant: plant_mask_list[*][0] #masks: plant_mask_list[*][1][0] ~ plant_mask_list[*][1][n] max_len = max(max_size) for pm_pair in plant_mask_list: pm_pair[0] = pad_image(pm_pair[0], max_len, max_len) for (idx, mask) in enumerate(pm_pair[1]): #mask = pad_image(mask, max_len, max_len) # Not correctly working. pm_pair[1][idx] = pad_image(mask, max_len, max_len) #-------------------- # Create models, sessions, and graphs. # Create graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Create a model. modelForTraining = create_seq2seq_encoder_decoder( encoder_input_shape, decoder_input_shape, decoder_output_shape, dataset, is_time_major) modelForTraining.create_training_model() # Create a trainer. #nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch, augmenter, is_output_augmented) nnTrainer = SimpleGradientClippingNeuralNetTrainer( modelForTraining, max_gradient_norm, initial_epoch, augmenter, is_output_augmented) # Create a saver. # Save a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Create a model. modelForEvaluation = create_seq2seq_encoder_decoder( encoder_input_shape, decoder_input_shape, decoder_output_shape, dataset, is_time_major) modelForEvaluation.create_evaluation_model() # Create an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Create a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Create a model. modelForInference = create_seq2seq_encoder_decoder( encoder_input_shape, decoder_input_shape, decoder_output_shape, dataset, is_time_major) modelForInference.create_inference_model() # Create an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Create a saver. infer_saver = tf.train.Saver() # Create sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initialize. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Train and evaluate. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_with_decoder_input( sess, nnTrainer, train_encoder_input_seqs, train_decoder_input_seqs, train_decoder_output_seqs, val_encoder_input_seqs, val_decoder_input_seqs, val_decoder_output_seqs, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_with_decoder_input( sess, nnEvaluator, val_encoder_input_seqs, val_decoder_input_seqs, val_decoder_output_seqs, batch_size, eval_saver, checkpoint_dir_path, is_time_major) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infer. test_strs = ['abc', 'cba', 'dcb', 'abcd', 'dcba', 'cdacbd', 'bcdaabccdb'] # String data -> numeric data. test_data = dataset.to_numeric(test_strs) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_data, batch_size, infer_saver, checkpoint_dir_path) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: # Numeric data -> string data. inferred_strs = dataset.to_string(inferences, has_start_token=False) print('\tTest strings = {}, inferred strings = {}'.format( test_strs, inferred_strs)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Close sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'reverse_function_encdec' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180116T212902' initial_epoch = 0 characters = list('abcd') # FIXME [modify] >> In order to use a time-major dataset, trainer, evaluator, and inferrer have to be modified. is_time_major = False is_dynamic = False is_attentive = True # Uses attention mechanism. is_bidirectional = True # Uses a bidirectional model. if is_attentive: batch_size = 4 # Number of samples per gradient update. num_epochs = 150 # Number of times to iterate over training data. else: batch_size = 4 # Number of samples per gradient update. num_epochs = 150 # Number of times to iterate over training data. shuffle = True augmenter = None is_output_augmented = False sess_config = tf.ConfigProto() #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepare directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepare data. dataset = ReverseFunctionDataset(characters) train_encoder_input_seqs, train_decoder_output_seqs, _, val_encoder_input_seqs, val_decoder_output_seqs, _ = dataset.generate_dataset( is_time_major) #train_encoder_input_seqs, _, train_decoder_output_seqs, val_encoder_input_seqs, _, val_decoder_output_seqs = dataset.generate_dataset(is_time_major) if is_dynamic: # Dynamic RNNs use variable-length dataset. # TODO [improve] >> Training & validation datasets are still fixed-length (static). input_shape = (None, None, dataset.vocab_size) output_shape = (None, None, dataset.vocab_size) else: # Static RNNs use fixed-length dataset. if is_time_major: # (time-steps, samples, features). input_shape = (dataset.max_token_len, None, dataset.vocab_size) output_shape = (dataset.max_token_len, None, dataset.vocab_size) else: # (samples, time-steps, features). input_shape = (None, dataset.max_token_len, dataset.vocab_size) output_shape = (None, dataset.max_token_len, dataset.vocab_size) #-------------------- # Create models, sessions, and graphs. # Create graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Create a model. modelForTraining = create_encoder_decoder(input_shape, output_shape, is_attentive, is_dynamic, is_bidirectional, is_time_major) modelForTraining.create_training_model() # Create a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch, augmenter, is_output_augmented) # Create a saver. # Save a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Create a model. modelForEvaluation = create_encoder_decoder( input_shape, output_shape, is_attentive, is_dynamic, is_bidirectional, is_time_major) modelForEvaluation.create_evaluation_model() # Create an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Create a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Create a model. modelForInference = create_encoder_decoder(input_shape, output_shape, is_attentive, is_dynamic, is_bidirectional, is_time_major) modelForInference.create_inference_model() # Create an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Create a saver. infer_saver = tf.train.Saver() # Create sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initialize. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Train and evaluate. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net( sess, nnTrainer, train_encoder_input_seqs, train_decoder_output_seqs, val_encoder_input_seqs, val_decoder_output_seqs, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net(sess, nnEvaluator, val_encoder_input_seqs, val_decoder_output_seqs, batch_size, eval_saver, checkpoint_dir_path, is_time_major) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infer. test_strs = ['abc', 'cba', 'dcb', 'abcd', 'dcba', 'cdacbd', 'bcdaabccdb'] # String data -> numeric data. test_data = dataset.to_numeric(test_strs) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_strs, batch_size, infer_saver, checkpoint_dir_path, is_time_major) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: # Numeric data -> string data. inferred_strs = dataset.to_string(inferences, has_start_token=True) print('\tTest strings = {}, inferred strings = {}'.format( test_strs, inferred_strs)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Close sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'timit_rnn_ctc' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20181129T122700' initial_epoch = 0 is_sparse_label = True is_time_major = False label_eos_token = -1 num_features = 13 # Account the 0th indice + space + blank label = 28 characters. num_classes = ord('z') - ord('a') + 1 + 1 + 1 num_examples = 1 batch_size = 1 # Number of samples per gradient update. num_epochs = 200 # Number of times to iterate over training data. #num_batches_per_epoch = int(num_examples / batch_size) shuffle = True # Create sessions. sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepare directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepare data. # Constants. SPACE_TOKEN = '<space>' SPACE_INDEX = 0 FIRST_INDEX = ord('a') - 1 # 0 is reserved to space. # Load the data. audio_filepath = '../../../data/machine_learning/LDC93S1.wav' target_filepath = '../../../data/machine_learning/LDC93S1.txt' fs, audio = wav.read(audio_filepath) inputs = mfcc(audio, samplerate=fs) # Tranform in 3D array. train_inputs = np.asarray(inputs[np.newaxis, :]) train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs) train_seq_len = [train_inputs.shape[1]] # Read targets. with open(target_filepath, 'r') as fd: # Only the last line is necessary. line = fd.readlines()[-1] # Get only the words between [a-z] and replace period for none. original = ' '.join(line.strip().lower().split(' ')[2:]).replace( '.', '') targets = original.replace(' ', ' ') targets = targets.split(' ') # Add blank label. targets = np.hstack([SPACE_TOKEN if '' == x else list(x) for x in targets]) # Transform char into index. targets = np.asarray([ SPACE_INDEX if SPACE_TOKEN == x else ord(x) - FIRST_INDEX for x in targets ]) if is_sparse_label: # Create sparse representation to feed the placeholder. # NOTE [info] {important} >> A tuple (indices, values, dense_shape) for a sparse tensor, not tf.SparseTensor. train_outputs = swl_ml_util.sequences_to_sparse([targets]) #train_outputs = swl_ml_util.sequences_to_sparse([targets, targets]) #train_outputs = swl_ml_util.sequences_to_sparse(np.vstack([targets, targets])) else: train_outputs = targets.reshape((-1, ) + targets.shape) # We don't have a validation dataset. val_inputs, val_outputs, val_seq_len = train_inputs, train_outputs, train_seq_len #-------------------- # Create models, sessions, and graphs. # Create graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Create a model. modelForTraining = create_rnn(num_features, num_classes, label_eos_token, is_time_major, is_sparse_label) modelForTraining.create_training_model() # Create a trainer. nnTrainer = SimpleRnnTrainer(modelForTraining, initial_epoch) # Create a saver. # Save a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Create a model. modelForEvaluation = create_rnn(num_features, num_classes, label_eos_token, is_time_major, is_sparse_label) modelForEvaluation.create_evaluation_model() # Create an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Create a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Create a model. modelForInference = create_rnn(num_features, num_classes, label_eos_token, is_time_major, is_sparse_label) modelForInference.create_inference_model() # Create an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Create a saver. infer_saver = tf.train.Saver() if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initialize. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Train and evaluate. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): if is_sparse_label: # Supports lists of dense and sparse labels. swl_tf_util.train_neural_net_by_batch_list( sess, nnTrainer, [train_inputs], [train_outputs], [val_inputs], [val_outputs], num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_label) else: # Supports a dense label only. swl_tf_util.train_neural_net( sess, nnTrainer, train_inputs, train_outputs, val_inputs, val_outputs, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net(sess, nnEvaluator, val_inputs, val_outputs, batch_size, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_label) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infer. start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): # type(inferences) = tf.SparseTensorValue. inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, val_inputs, batch_size, infer_saver, checkpoint_dir_path, is_time_major) str_decoded = ''.join( [chr(x) for x in np.asarray(inferences.values) + FIRST_INDEX]) # Replaces blank label to none. str_decoded = str_decoded.replace(chr(ord('z') + 1), '') # Replaces space label to space. str_decoded = str_decoded.replace(chr(ord('a') - 1), ' ') print('Original:\n%s' % original) print('Decoded:\n%s' % str_decoded) print('\tTotal inference time = {}'.format(time.time() - start_time)) #-------------------- # Close sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_cnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20190127T001424' initial_epoch = 0 num_classes = 10 input_shape = (None, 28, 28, 1) # 784 = 28 * 28. output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() is_output_augmented = False use_multiprocessing = True # Batch generators & loaders are used in case of multiprocessing. use_file_batch_loader = True # Is not related to multiprocessing. num_processes = 5 train_batch_dir_path_prefix = './train_batch_dir' #train_num_batch_dirs = 5 val_batch_dir_path_prefix = './val_batch_dir' val_num_batch_dirs = 1 test_batch_dir_path_prefix = './test_batch_dir' test_num_batch_dirs = 1 batch_info_csv_filename = 'batch_info.csv' sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares multiprocessing. if use_multiprocessing: # set_start_method() should not be used more than once in the program. #mp.set_start_method('spawn') BaseManager.register('WorkingDirectoryManager', WorkingDirectoryManager) BaseManager.register('TwoStepWorkingDirectoryManager', TwoStepWorkingDirectoryManager) BaseManager.register('NpzFileBatchGenerator', NpzFileBatchGenerator) #BaseManager.register('NpzFileBatchLoader', NpzFileBatchLoader) manager = BaseManager() manager.start() lock = mp.Lock() #lock= mp.Manager().Lock() # TypeError: can't pickle _thread.lock objects. #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Creates a model. modelForTraining = create_mnist_cnn(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Creates a model. modelForEvaluation = create_mnist_cnn(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Creates a model. modelForInference = create_mnist_cnn(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: if use_file_batch_loader or use_multiprocessing: valDirMgr = WorkingDirectoryManager(val_batch_dir_path_prefix, val_num_batch_dirs) while True: val_dir_path = valDirMgr.requestDirectory() if val_dir_path is not None: break else: time.sleep(0.1) print( '\tGot a validation batch directory: {}.'.format(val_dir_path)) valFileBatchGenerator = NpzFileBatchGenerator( test_images, test_labels, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) valFileBatchGenerator.saveBatches( val_dir_path) # Generates and saves batches. valDirMgr.returnDirectory(val_dir_path) if use_multiprocessing: train_num_batch_dirs = 5 trainDirMgr_mp = manager.TwoStepWorkingDirectoryManager( train_batch_dir_path_prefix, train_num_batch_dirs) valDirMgr_mp = manager.WorkingDirectoryManager( val_batch_dir_path_prefix, val_num_batch_dirs) #trainFileBatchGenerator_mp = manager.NpzFileBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter=augmenter, is_output_augmented=is_output_augmented, batch_info_csv_filename=batch_info_csv_filename) #trainFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #valFileBatchLoader_mp = manager.NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) #-------------------- if False: # Multiprocessing only. # FIXME [fix] >> This code does not work. # TensorFlow session and saver cannot be passed to a worker procedure in using multiprocessing.pool.apply_async(). #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock, )) as pool: training_results = pool.apply_async( training_worker_proc, args=(train_session, nnTrainer, trainDirMgr_mp, valDirMgr_mp, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False)) data_augmentation_results = pool.map_async( partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_images, train_labels, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) training_results.get(timeout) data_augmentation_results.get(timeout) else: # Multiprocessing (augmentation) + multithreading (training). training_worker_thread = threading.Thread( target=training_worker_proc, args=(train_session, nnTrainer, trainDirMgr_mp, valDirMgr_mp, batch_info_csv_filename, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False)) training_worker_thread.start() #timeout = 10 timeout = None with mp.Pool(processes=num_processes, initializer=initialize_lock, initargs=(lock, )) as pool: data_augmentation_results = pool.map_async( partial(augmentation_worker_proc, augmenter, is_output_augmented, batch_info_csv_filename, trainDirMgr_mp, train_images, train_labels, batch_size, shuffle, False), [epoch for epoch in range(num_epochs)]) data_augmentation_results.get(timeout) training_worker_thread.join() elif use_file_batch_loader: train_num_batch_dirs = num_epochs trainDirMgr = WorkingDirectoryManager(train_batch_dir_path_prefix, train_num_batch_dirs) # TODO [improve] >> Not-so-good implementation. # Usaually training is performed for much more epochs, so too many batches have to be generated before training. for _ in range(train_num_batch_dirs): while True: train_dir_path = trainDirMgr.requestDirectory() if train_dir_path is not None: break else: time.sleep(0.1) print('\tGot a train batch directory: {}.'.format( train_dir_path)) trainFileBatchGenerator = NpzFileBatchGenerator( train_images, train_labels, batch_size, shuffle, False, batch_info_csv_filename=batch_info_csv_filename) trainFileBatchGenerator.saveBatches( train_dir_path) # Generates and saves batches. trainDirMgr.returnDirectory(train_dir_path) #-------------------- trainFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) valFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_loader( sess, nnTrainer, trainFileBatchLoader, valFileBatchLoader, trainDirMgr, valDirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False) print('\tTotal training time = {}'.format(time.time() - start_time)) else: trainBatchGenerator = SimpleBatchGenerator(train_images, train_labels, batch_size, shuffle, False, augmenter, is_output_augmented) valBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_generator( sess, nnTrainer, trainBatchGenerator, valBatchGenerator, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, False, False) print('\tTotal training time = {}'.format(time.time() - start_time)) #-------------------- if use_file_batch_loader: valFileBatchLoader = NpzFileBatchLoader( batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_loader( sess, nnEvaluator, valFileBatchLoader, valDirMgr, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) else: valBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_batch_generator( sess, nnEvaluator, valBatchGenerator, eval_saver, checkpoint_dir_path, False, False) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. if use_file_batch_loader: testDirMgr = WorkingDirectoryManager(test_batch_dir_path_prefix, test_num_batch_dirs) #-------------------- while True: test_dir_path = testDirMgr.requestDirectory() if test_dir_path is not None: break else: time.sleep(0.1) print('\tGot a test batch directory: {}.'.format(test_dir_path)) testFileBatchGenerator = NpzFileBatchGenerator( test_images, test_labels, batch_size, False, False, batch_info_csv_filename=batch_info_csv_filename) testFileBatchGenerator.saveBatches( test_dir_path) # Generates and saves batches. testDirMgr.returnDirectory(test_dir_path) #-------------------- testFileBatchLoader = NpzFileBatchLoader(batch_info_csv_filename, data_processing_functor=None) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_loader( sess, nnInferrer, testFileBatchLoader, testDirMgr, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) else: testBatchGenerator = SimpleBatchGenerator(test_images, test_labels, batch_size, False, False) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_batch_generator( sess, nnInferrer, testBatchGenerator, infer_saver, checkpoint_dir_path, False) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: inferences = np.vstack(inferences) if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_cnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180302T155710' initial_epoch = 0 num_classes = 10 input_shape = (None, 28, 28, 1) # 784 = 28 * 28. output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True augmenter = ImgaugAugmenter() #augmenter = create_imgaug_augmenter() # If imgaug augmenter is used, data are augmented in background augmentation processes. (faster) is_output_augmented = False sess_config = tf.ConfigProto() #sess_config = tf.ConfigProto(device_count={'GPU': 2, 'CPU': 1}) # os.environ['CUDA_VISIBLE_DEVICES'] = 0,1. sess_config.allow_soft_placement = True sess_config.log_device_placement = True #sess_config.operation_timeout_in_ms = 50000 sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. train_device_name = '/device:GPU:1' eval_device_name = '/device:GPU:1' # NOTE [info] >> Cannot assign a device for operation save/SaveV2: Could not satisfy explicit device specification '/device:GPU:1' because no supported kernel for GPU devices is available. # Errors occur in tf_cnnvis library when a GPU is assigned. #infer_device_name = '/device:GPU:1' infer_device_name = '/device:CPU:0' #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): with tf.device(train_device_name): #K.set_learning_phase(1) # Sets the learning phase to 'train'. (Required) # Creates a model. modelForTraining = create_mnist_cnn(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch, augmenter, is_output_augmented) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): with tf.device(eval_device_name): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForEvaluation = create_mnist_cnn(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): with tf.device(infer_device_name): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForInference = create_mnist_cnn(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(1) # Sets the learning phase to 'train'. swl_tf_util.train_neural_net( sess, nnTrainer, train_images, train_labels, test_images, test_labels, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(0) # Sets the learning phase to 'test'. swl_tf_util.evaluate_neural_net(sess, nnEvaluator, test_images, test_labels, batch_size, eval_saver, checkpoint_dir_path) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(0) # Sets the learning phase to 'test'. inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_images, batch_size, infer_saver, checkpoint_dir_path) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #%%------------------------------------------------------------------ # Visualizes. if True: with infer_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(0) # Sets the learning phase to 'test'. #-------------------- idx = 0 #vis_images = train_images[idx:(idx+1)] # Recommends using a single image. vis_images = test_images[idx:( idx + 1)] # Recommends using a single image. feed_dict = modelForInference.get_feed_dict(vis_images, is_training=False) input_tensor = None #input_tensor = modelForInference.input_tensor print('[SWL] Info: Start visualizing activation...') start = time.time() is_succeeded = swl_ml_util.visualize_activation( sess, input_tensor, feed_dict, output_dir_path) print('\tVisualization time = {}, succeeded? = {}'.format( time.time() - start, 'yes' if is_succeeded else 'no')) print('[SWL] Info: End visualizing activation...') print('[SWL] Info: Start visualizing by deconvolution...') start = time.time() is_succeeded = swl_ml_util.visualize_by_deconvolution( sess, input_tensor, feed_dict, output_dir_path) print('\tVisualization time = {}, succeeded? = {}'.format( time.time() - start, 'yes' if is_succeeded else 'no')) print('[SWL] Info: End visualizing by deconvolution...') #import matplotlib.pyplot as plt #plt.imsave(output_dir_path + '/vis.png', np.around(vis_images[0].reshape(vis_images[0].shape[:2]) * 255), cmap='gray') #-------------------- #vis_images = train_images[0:10] #vis_labels = train_labels[0:10] vis_images = test_images[0:100] vis_labels = test_labels[0:100] print('[SWL] Info: Start visualizing by partial occlusion...') start_time = time.time() grid_counts = ( 28, 28) # (grid count in height, grid count in width). grid_size = (4, 4) # (grid height, grid width). occlusion_color = 0 # Black. occluded_probilities = swl_ml_util.visualize_by_partial_occlusion( sess, nnInferrer, vis_images, vis_labels, grid_counts, grid_size, occlusion_color, num_classes, batch_size, infer_saver, checkpoint_dir_path) print('\tVisualization time = {}'.format(time.time() - start_time)) print('[SWL] Info: End visualizing by partial occlusion...') if occluded_probilities is not None: import matplotlib.pyplot as plt for (idx, prob) in enumerate(occluded_probilities): #plt.figure() #plt.imshow(1 - prob.reshape(prob.shape[:2]), cmap='gray') #plt.figure() #plt.imshow(vis_images[idx].reshape(vis_images[idx].shape[:2]), cmap='gray') plt.imsave( (output_dir_path + '/occluded_prob_{}.png').format(idx), np.around( (1 - prob.reshape(prob.shape[:2])) * 255), cmap='gray') plt.imsave( (output_dir_path + '/vis_{}.png').format(idx), np.around(vis_images[idx].reshape( vis_images[idx].shape[:2]) * 255), cmap='gray') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_crnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20181211T172200' #max_gradient_norm = 5 initial_epoch = 0 is_time_major = False # Fixed. is_sparse_label = True if is_sparse_label: use_batch_list = True # Fixed. else: use_batch_list = False image_height, image_width, image_channel = 28, 28, 1 """ # For prepare_single_character_dataset(). slice_width, slice_stride = 14, 7 min_time_steps = math.ceil((image_width - slice_width) / slice_stride) + 1 max_time_steps = min_time_steps # max_time_steps >= min_time_steps. """ # For prepare_multiple_character_dataset(). min_digit_count, max_digit_count = 3, 5 max_time_steps = max_digit_count + 2 # max_time_steps >= max_digit_count. num_labels = 10 # NOTE [info] >> The largest value (num_classes - 1) is reserved for the blank label. # 0~9 + space label + blank label. num_classes = num_labels + 1 + 1 space_label = num_classes - 2 blank_label = num_classes - 1 label_eos_token = -1 batch_size = 128 # Number of samples per gradient update. if is_sparse_label: num_epochs = 500 # Number of times to iterate over training data. else: num_epochs = 200 # Number of times to iterate over training data. shuffle = True sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepare directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepare data. #train_images, train_labels, test_images, test_labels = prepare_single_character_dataset((image_height, image_width, image_channel), num_classes, max_time_steps, slice_width, slice_stride, is_sparse_label) # Images: (samples, time-steps, height, width, channels), labels: (samples, num_digits, one-hot encoding). train_images, train_labels, test_images, test_labels = prepare_multiple_character_dataset( (image_height, image_width, image_channel), num_classes, min_digit_count, max_digit_count, max_time_steps, space_label, is_sparse_label) # Visualize dataset. #visualize_dataset(train_images, train_labels, 5) #visualize_dataset(test_images, test_labels, 5) if is_sparse_label: train_labels = np.argmax(train_labels, axis=-1) test_labels = np.argmax(test_labels, axis=-1) if use_batch_list: train_images_list, train_labels_list = swl_ml_util.generate_batch_list( train_images, train_labels, batch_size, shuffle=shuffle, is_time_major=is_time_major, is_sparse_label=is_sparse_label, eos_token=blank_label) test_images_list, test_labels_list = swl_ml_util.generate_batch_list( test_images, test_labels, batch_size, shuffle=False, is_time_major=is_time_major, is_sparse_label=is_sparse_label, eos_token=blank_label) print( 'Train images = {}, train labels = {}, test images = {}, test labels = {}' .format(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)) if use_batch_list: print( 'Train images list = {}, train labels list = {}, test images list = {}, test labels list = {}' .format(len(train_images_list), len(train_labels_list), len(test_images_list), len(test_labels_list))) #-------------------- # Create models, sessions, and graphs. # Create graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Create a model. modelForTraining = create_crnn(image_height, image_width, image_channel, num_classes, max_time_steps, is_time_major, is_sparse_label, label_eos_token) modelForTraining.create_training_model() # Create a trainer. nnTrainer = SimpleCrnnTrainer(modelForTraining, initial_epoch) #nnTrainer = SimpleCrnnGradientClippingTrainer(modelForTraining, max_gradient_norm, initial_epoch) # Create a saver. # Save a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() #initializer = tf.variables_initializer(tf.global_variables()) #initializer = tf.glorot_normal_initializer(tf.global_variables()) # Xavier normal initializer. #initializer = tf.glorot_uniform_initializer(tf.global_variables()) # Xavier uniform initializer. #initializer = tf.uniform_unit_scaling_initializer(tf.global_variables()) #initializer = tf.variance_scaling_initializer(tf.global_variables()) #initializer = tf.orthogonal_initializer(tf.global_variables()) #initializer = tf.truncated_normal_initializer(tf.global_variables()) #initializer = tf.random_normal_initializer(tf.global_variables()) #initializer = tf.random_uniform_initializer(tf.global_variables()) with eval_graph.as_default(): # Create a model. modelForEvaluation = create_crnn(image_height, image_width, image_channel, num_classes, max_time_steps, is_time_major, is_sparse_label, label_eos_token) modelForEvaluation.create_evaluation_model() # Create an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Create a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Create a model. modelForInference = create_crnn(image_height, image_width, image_channel, num_classes, max_time_steps, is_time_major, is_sparse_label, label_eos_token) modelForInference.create_inference_model() # Create an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Create a saver. infer_saver = tf.train.Saver() # Create sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initialize. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Train and evaluate. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): if use_batch_list: # Supports lists of dense or sparse labels. swl_tf_util.train_neural_net_by_batch_list( sess, nnTrainer, train_images_list, train_labels_list, test_images_list, test_labels_list, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_label) else: # Supports a dense label only. #swl_tf_util.train_neural_net_after_generating_batch_list(sess, nnTrainer, train_images, train_labels, test_images, test_labels, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major) swl_tf_util.train_neural_net( sess, nnTrainer, train_images, train_labels, test_images, test_labels, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): if use_batch_list: # Supports lists of dense or sparse labels. swl_tf_util.evaluate_neural_net_by_batch_list( sess, nnEvaluator, test_images_list, test_labels_list, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_label) else: #test_labels = swl_ml_util.dense_to_sparse(np.argmax(test_labels, axis=-1), eos_token=label_eos_token) # Supports dense or sparse labels. #swl_tf_util.evaluate_neural_net(sess, nnEvaluator, test_images, test_labels, batch_size, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_label) # Supports dense or sparse labels. swl_tf_util.evaluate_neural_net(sess, nnEvaluator, test_images, test_labels, batch_size, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_label) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infer. start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): if is_sparse_label: ground_truths = test_labels if use_batch_list: # Supports lists of dense or sparse labels. inferences_list = swl_tf_util.infer_from_batch_list_by_neural_net( sess, nnInferrer, test_images_list, infer_saver, checkpoint_dir_path, is_time_major) inferences = None for inf in inferences_list: #inf = sess.run(tf.sparse_to_dense(inf[0], inf[2], inf[1], default_value=label_eos_token)) inf = sess.run( tf.sparse_to_dense(inf[0], inf[2], inf[1], default_value=blank_label)) inferences = inf if inferences is None else np.concatenate( (inferences, inf), axis=0) else: # Supports dense or sparse labels. inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_images, batch_size, infer_saver, checkpoint_dir_path, is_time_major, is_sparse_label) #inferences = sess.run(tf.sparse_to_dense(inferences[0], inferences[2], inferences[1], default_value=label_eos_token)) inferences = sess.run( tf.sparse_to_dense(inferences[0], inferences[2], inferences[1], default_value=blank_label)) else: ground_truths = np.argmax(test_labels, axis=-1) if use_batch_list: # Supports lists of dense or sparse labels. inferences_list = swl_tf_util.infer_from_batch_list_by_neural_net( sess, nnInferrer, test_images_list, infer_saver, checkpoint_dir_path, is_time_major) inferences = None for inf in inferences_list: inferences = inf if inferences is None else np.concatenate( (inferences, inf), axis=0) else: # Supports dense or sparse labels. inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_images, batch_size, infer_saver, checkpoint_dir_path, is_time_major, is_sparse_label) inferences = np.argmax(inferences, axis=-1) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: # TODO [check] >> Is it correct? correct_estimation_count = np.count_nonzero( np.equal(inferences, ground_truths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, ground_truths.size, correct_estimation_count / ground_truths.size)) for i in range(10): print(inferences[i], ground_truths[i]) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Close sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_mlp' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180302T155710' initial_epoch = 0 num_classes = 10 input_shape = (None, 28, 28, 1) # 784 = 28 * 28. output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True augmenter = None is_output_augmented = False #sess_config = tf.ConfigProto() sess_config = tf.ConfigProto(device_count={ 'GPU': 2, 'CPU': 1 }) # os.environ['CUDA_VISIBLE_DEVICES'] = 0,-1,2. sess_config.allow_soft_placement = True sess_config.log_device_placement = True #sess_config.operation_timeout_in_ms = 50000 sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. train_device_name = '/device:GPU:1' eval_device_name = '/device:GPU:1' infer_device_name = '/device:GPU:1' #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): with tf.device(train_device_name): #K.set_learning_phase(1) # Sets the learning phase to 'train'. (Required) # Creates a model. modelForTraining = create_mnist_mlp(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch, augmenter, is_output_augmented) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): with tf.device(eval_device_name): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForEvaluation = create_mnist_mlp(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): with tf.device(infer_device_name): #K.set_learning_phase(0) # Sets the learning phase to 'test'. (Required) # Creates a model. modelForInference = create_mnist_mlp(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(1) # Sets the learning phase to 'train'. swl_tf_util.train_neural_net( sess, nnTrainer, train_images, train_labels, test_images, test_labels, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(0) # Sets the learning phase to 'test'. swl_tf_util.evaluate_neural_net(sess, nnEvaluator, test_images, test_labels, batch_size, eval_saver, checkpoint_dir_path) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infers. start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): #K.set_session(sess) #K.set_learning_phase(0) # Sets the learning phase to 'test'. inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_images, batch_size, infer_saver, checkpoint_dir_path) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session
def main(): #np.random.seed(7) #-------------------- # Parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'plant_foreground_extraction' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20180117T135317' initial_epoch = 0 image_suffix = '' image_extension = 'png' label_suffix = '_foreground' label_extension = 'png' patch_height, patch_width = 224, 224 num_classes = 2 input_shape = (None, patch_height, patch_width, 3) output_shape = (None, patch_height, patch_width, num_classes) batch_size = 6 # Number of samples per gradient update. num_epochs = 50 # Number of times to iterate over training data. shuffle = True augmenter = None is_output_augmented = False sess_config = tf.ConfigProto() #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepare directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') npy_dir_path = os.path.joint(output_dir_path, 'npy') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) swl_util.make_dir(npy_dir_path) #-------------------- # Prepare data. if 'posix' == os.name: data_home_dir_path = '/home/sangwook/my_dataset' else: data_home_dir_path = 'D:/dataset' image_dir_path = data_home_dir_path + '/phenotyping/RDA/all_plants' label_dir_path = data_home_dir_path + '/phenotyping/RDA/all_plants_foreground' train_image_patches, test_image_patches, train_label_patches, test_label_patches, image_list, label_list = RdaPlantDataset.load_data( image_dir_path, image_suffix, image_extension, label_dir_path, label_suffix, label_extension, num_classes, patch_height, patch_width) #-------------------- # Create models, sessions, and graphs. # Create graphs. """ if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() """ default_graph = tf.get_default_graph() # Create sessions. """ if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) """ #default_session = tf.get_default_session() default_session = tf.Session(graph=default_graph, config=sess_config) if does_need_training: train_session = default_session eval_session = default_session infer_session = default_session if does_need_training: #with train_graph.as_default(): with train_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase( 1) # Set the learning phase to 'train'. (Required) # Create a model. modelForTraining = FcDenseNetUsingKeras( input_shape, output_shape) modelForTraining.create_training_model() # Create a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch, augmenter, is_output_augmented) # Create a saver. # Save a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() #with eval_graph.as_default(): with eval_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase( 0) # Set the learning phase to 'test'. (Required) # Create a model. """ modelForEvaluation = FcDenseNetUsingKeras(input_shape, output_shape) modelForEvaluation.create_evaluation_model() """ modelForEvaluation = modelForTraining # Create an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Create a saver. #eval_saver = tf.train.Saver() eval_saver = None # Do not load a model. else: modelForTraining = None #with infer_graph.as_default(): with infer_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase( 0) # Set the learning phase to 'test'. (Required) # Create a model. if does_need_training: modelForInference = modelForTraining else: modelForInference = FcDenseNetUsingKeras( input_shape, output_shape) modelForInference.create_inference_model() # Create an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Create a saver. if does_need_training: infer_saver = None # Do not load a model. else: infer_saver = tf.train.Saver() # Initialize. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Train and evaluate. if does_need_training: start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase(1) # Set the learning phase to 'train'. swl_tf_util.train_neural_net( sess, nnTrainer, train_image_patches, train_label_patches, test_image_patches, test_label_patches, batch_size, num_epochs, shuffle, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path) print('\tTotal training time = {}'.format(time.time() - start_time)) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase(0) # Set the learning phase to 'test'. swl_tf_util.evaluate_neural_net(sess, nnEvaluator, test_image_patches, test_label_patches, batch_size, eval_saver, checkpoint_dir_path) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) #%%------------------------------------------------------------------ # Infer. start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase(0) # Set the learning phase to 'test'. inferences = swl_tf_util.infer_by_neural_net( sess, nnInferrer, test_image_patches, batch_size, infer_saver, checkpoint_dir_path) print('\tTotal inference time = {}'.format(time.time() - start_time)) if inferences is not None: if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #%%------------------------------------------------------------------ print('[SWL] Info: Start inferring full-size images using patches...') with infer_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase(0) # Set the learning phase to 'test'. infer_full_size_images_from_patches(sess, nnInferrer, image_list, label_list, patch_height, patch_width, num_classes, batch_size, inference_dir_path) print('[SWL] Info: End inferrig full-size images using patches...') #print('[SWL] Info: Start visualizing filters...') #with infer_session.as_default() as sess: # with sess.graph.as_default(): # K.set_session(sess) # K.set_learning_phase(0) # Set the learning phase to 'test'. # visualize_filters(sess) #print('[SWL] Info: End visualizing filters...') print('[SWL] Info: Start visualizing activations...') with infer_session.as_default() as sess: with sess.graph.as_default(): K.set_session(sess) K.set_learning_phase(0) # Set the learning phase to 'test'. visualize_activations(sess, modelForInference, nnInferrer, image_list, patch_height, patch_width, num_classes, batch_size, npy_dir_path) print('[SWL] Info: End visualizing activations...') #-------------------- # Close sessions. """ if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session """ default_session.close() del default_session
def mnist_batch_manager(method=0): #np.random.seed(7) #-------------------- # Sets parameters. does_need_training = True does_resume_training = False output_dir_prefix = 'mnist_cnn' output_dir_suffix = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') #output_dir_suffix = '20181211T172200' initial_epoch = 0 image_height, image_width = 28, 28 num_classes = 10 input_shape = (None, image_height, image_width, 1) output_shape = (None, num_classes) batch_size = 128 # Number of samples per gradient update. num_epochs = 30 # Number of times to iterate over training data. shuffle = True is_label_augmented = False is_time_major = False is_sparse_output = False sess_config = tf.ConfigProto() #sess_config.device_count = {'GPU': 2} #sess_config.allow_soft_placement = True sess_config.log_device_placement = True sess_config.gpu_options.allow_growth = True #sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4 # Only allocate 40% of the total memory of each GPU. #-------------------- # Prepares directories. output_dir_path = os.path.join( '.', '{}_{}'.format(output_dir_prefix, output_dir_suffix)) checkpoint_dir_path = os.path.join(output_dir_path, 'tf_checkpoint') inference_dir_path = os.path.join(output_dir_path, 'inference') train_summary_dir_path = os.path.join(output_dir_path, 'train_log') val_summary_dir_path = os.path.join(output_dir_path, 'val_log') swl_util.make_dir(checkpoint_dir_path) swl_util.make_dir(inference_dir_path) swl_util.make_dir(train_summary_dir_path) swl_util.make_dir(val_summary_dir_path) #-------------------- # Prepares data. train_images, train_labels, test_images, test_labels = load_data( input_shape[1:]) #-------------------- # Creates models, sessions, and graphs. # Creates graphs. if does_need_training: train_graph = tf.Graph() eval_graph = tf.Graph() infer_graph = tf.Graph() if does_need_training: with train_graph.as_default(): # Creates a model. modelForTraining = create_mnist_cnn(input_shape, output_shape) modelForTraining.create_training_model() # Creates a trainer. nnTrainer = SimpleNeuralNetTrainer(modelForTraining, initial_epoch) # Creates a saver. # Saves a model every 2 hours and maximum 5 latest models are saved. train_saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) initializer = tf.global_variables_initializer() with eval_graph.as_default(): # Creates a model. modelForEvaluation = create_mnist_cnn(input_shape, output_shape) modelForEvaluation.create_evaluation_model() # Creates an evaluator. nnEvaluator = NeuralNetEvaluator(modelForEvaluation) # Creates a saver. eval_saver = tf.train.Saver() with infer_graph.as_default(): # Creates a model. modelForInference = create_mnist_cnn(input_shape, output_shape) modelForInference.create_inference_model() # Creates an inferrer. nnInferrer = NeuralNetInferrer(modelForInference) # Creates a saver. infer_saver = tf.train.Saver() # Creates sessions. if does_need_training: train_session = tf.Session(graph=train_graph, config=sess_config) eval_session = tf.Session(graph=eval_graph, config=sess_config) infer_session = tf.Session(graph=infer_graph, config=sess_config) # Initializes. if does_need_training: train_session.run(initializer) #%%------------------------------------------------------------------ # Trains and evaluates. if does_need_training: # Method #0: AugmentationBatchManager without process pool. if 0 == method: #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainBatchMgr = AugmentationBatchManager(augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, None) valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_manager( sess, nnTrainer, trainBatchMgr, valBatchMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #1: AugmentationBatchManager with process pool. elif 1 == method: with mp.Pool() as pool: #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainBatchMgr = AugmentationBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, pool) valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_manager( sess, nnTrainer, trainBatchMgr, valBatchMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #2: AugmentationFileBatchManager without process pool. elif 2 == method: batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainFileBatchMgr = AugmentationFileBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, None, image_file_format='train_batch_images_{}.npy', label_file_format='train_batch_labels_{}.npy') valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_manager( sess, nnTrainer, trainFileBatchMgr, valFileBatchMgr, dirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #3: AugmentationFileBatchManager with process pool. elif 3 == method: batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) with mp.Pool() as pool: #augmenter = IdentityAugmenter() augmenter = ImgaugAugmenter(image_height, image_width) trainFileBatchMgr = AugmentationFileBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, pool, image_file_format='train_batch_images_{}.npy', label_file_format='train_batch_labels_{}.npy') valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_manager( sess, nnTrainer, trainFileBatchMgr, valFileBatchMgr, dirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #4: ImgaugBatchManager with background processes. elif 4 == method: augmenter = get_imgaug_augmenter(image_height, image_width) trainBatchMgr = ImgaugBatchManager(augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major) valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_batch_manager( sess, nnTrainer, trainBatchMgr, valBatchMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) # Method #5: ImgaugFileBatchManager without background processes. elif 5 == method: batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) augmenter = get_imgaug_augmenter(image_height, image_width) trainFileBatchMgr = ImgaugFileBatchManager( augmenter, train_images, train_labels, batch_size, shuffle, is_label_augmented, is_time_major, image_file_format='train_batch_images_{}.npy', label_file_format='train_batch_labels_{}.npy') valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with train_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.train_neural_net_by_file_batch_manager( sess, nnTrainer, trainFileBatchMgr, valFileBatchMgr, dirMgr, num_epochs, does_resume_training, train_saver, output_dir_path, checkpoint_dir_path, train_summary_dir_path, val_summary_dir_path, is_time_major, is_sparse_output) print('\tTotal training time = {}'.format(time.time() - start_time)) else: raise ValueError( '[SWL] Error: Invalid batch manager method: {}.'.format( method)) #-------------------- if method in (0, 1, 4): valBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_batch_manager( sess, nnEvaluator, valBatchMgr, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_output) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) elif method in (2, 3, 5): batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) valFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with eval_session.as_default() as sess: with sess.graph.as_default(): swl_tf_util.evaluate_neural_net_by_file_batch_manager( sess, nnEvaluator, valFileBatchMgr, dirMgr, eval_saver, checkpoint_dir_path, is_time_major, is_sparse_output) print('\tTotal evaluation time = {}'.format(time.time() - start_time)) else: raise ValueError( '[SWL] Error: Invalid batch manager method: {}.'.format( method)) #%%------------------------------------------------------------------ # Infers. if method in (0, 1, 4): testBatchMgr = SimpleBatchManager(test_images, test_labels, batch_size, False, is_time_major) start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_batch_manager( sess, nnInferrer, testBatchMgr, infer_saver, checkpoint_dir_path, is_time_major) print('\tTotal inference time = {}'.format(time.time() - start_time)) elif method in (2, 3, 5): batch_dir_path_prefix = './batch_dir' num_batch_dirs = 5 dirMgr = WorkingDirectoryManager(batch_dir_path_prefix, num_batch_dirs) testFileBatchMgr = SimpleFileBatchManager( test_images, test_labels, batch_size, False, is_time_major, image_file_format='val_batch_images_{}.npy', label_file_format='val_batch_labels_{}.npy') start_time = time.time() with infer_session.as_default() as sess: with sess.graph.as_default(): inferences = swl_tf_util.infer_by_neural_net_and_file_batch_manager( sess, nnInferrer, testFileBatchMgr, dirMgr, infer_saver, checkpoint_dir_path, is_time_major) print('\tTotal inference time = {}'.format(time.time() - start_time)) else: raise ValueError( '[SWL] Error: Invalid batch manager method: {}.'.format(method)) if inferences is not None: inferences = np.vstack(inferences) if num_classes >= 2: inferences = np.argmax(inferences, -1) groundtruths = np.argmax(test_labels, -1) else: inferences = np.around(inferences) groundtruths = test_labels correct_estimation_count = np.count_nonzero( np.equal(inferences, groundtruths)) print('\tAccurary = {} / {} = {}'.format( correct_estimation_count, groundtruths.size, correct_estimation_count / groundtruths.size)) else: print('[SWL] Warning: Invalid inference results.') #-------------------- # Closes sessions. if does_need_training: train_session.close() del train_session eval_session.close() del eval_session infer_session.close() del infer_session