示例#1
0
 def estimator_dump(self):
     """In estimator mode. estim_spec = tf.estimator.EstimatorSpec(traing_hooks=[estimator_dump()])
     :return:
     """
     from tensorflow.python import debug as tf_debug
     self._init()
     return tf_debug.DumpingDebugHook(cfg.TF_DEBUG_DUMP_DIR)
示例#2
0
def main(unused_argv):
    del unused_argv  # Unused

    tf.logging.set_verbosity(tf.logging.INFO)

    assert FLAGS.seq_len > 0
    assert FLAGS.perm_size > 0

    FLAGS.n_token = data_utils.VOCAB_SIZE
    tf.logging.info("n_token {}".format(FLAGS.n_token))

    if not tf.gfile.Exists(FLAGS.model_dir):
        tf.gfile.MakeDirs(FLAGS.model_dir)

    # Get train input function
    train_input_fn, train_record_info_dict = get_input_fn("train")

    tf.logging.info("num of batches {}".format(
        train_record_info_dict["num_batch"]))

    # Get train cache function
    train_cache_fn = get_cache_fn(FLAGS.mem_len)

    ##### Get model function
    model_fn = get_model_fn()

    ##### Create TPUEstimator
    # TPU Configuration
    run_config = model_utils.configure_tpu(FLAGS)

    # TPU Estimator
    estimator = tpu_estimator.TPUEstimator(
        model_fn=model_fn,
        train_cache_fn=train_cache_fn,
        use_tpu=FLAGS.use_tpu,
        config=run_config,
        params={"track_mean": FLAGS.track_mean},
        train_batch_size=FLAGS.train_batch_size,
        eval_on_tpu=FLAGS.use_tpu)

    hooks = None
    if FLAGS.debug:
        if FLAGS.debug_dump_dir:
            hooks = [tf_debug.DumpingDebugHook(FLAGS.debug_dump_dir)]
        else:
            hooks = [tf_debug.LocalCLIDebugHook()]
    #### Training
    estimator.train(input_fn=train_input_fn,
                    max_steps=FLAGS.train_steps,
                    hooks=hooks)
  def process(self, query):
    """Returns the visualizations for query.

    Args:
      query: The query to process.

    Returns:
      A dictionary of results with processing and graph visualizations.
    """
    tf.logging.info("Processing new query [%s]" %query)

    # Create the new TFDBG hook directory.
    hook_dir = "/tmp/t2t_server_dump/request_%d" %int(time.time())
    os.makedirs(hook_dir)
    hooks = [tfdbg.DumpingDebugHook(hook_dir, watch_fn=topk_watch_fn)]

    # TODO(kstevens): This is extremely hacky and slow for responding to
    # queries.  Figure out a reasonable way to pre-load the model weights before
    # forking and run queries through the estimator quickly.
    def server_input_fn():
      """Generator that returns just the current query."""
      for _ in range(1):
        input_ids = self.source_vocab.encode(query)
        input_ids.append(text_encoder.EOS_ID)
        x = [1, 100, len(input_ids)] + input_ids
        x += [0] * (self.const_array_size - len(x))
        d = {
            "inputs": np.array(x).astype(np.int32),
            "problem_choice": np.array(0).astype(np.int32)
        }
        yield d

    def input_fn():
      """Generator that returns just the current query."""
      gen_fn = decoding.make_input_fn_from_generator(server_input_fn())
      example = gen_fn()
      # TODO(kstevens): Make this method public
      # pylint: disable=protected-access
      return decoding._interactive_input_tensor_to_features_dict(
          example, self.hparams)

    # Make the prediction for the current query.
    result_iter = self.estimator.predict(input_fn, hooks=hooks)
    result = None
    for result in result_iter:
      break

    # Extract the beam search information by reading the dumped TFDBG event
    # tensors.  We first read and record the per step beam sequences then record
    # the beam scores.  Afterwards we align the two sets of values to create the
    # full graph vertices and edges.
    decoding_graph = graph.Graph()
    run_dirs = sorted(glob.glob(os.path.join(hook_dir, "run_*")))
    for run_dir in run_dirs:
      # Record the different completed and active beam sequence ids.
      alive_sequences = deque()
      finished_sequences = deque()

      # Make the root vertex since it always needs to exist.
      decoding_graph.get_vertex(sequence_key([0]))

      # Create the initial vertices and edges for the active and finished
      # sequences.  We uniquely define each vertex using it's full sequence path
      # as a string to ensure there's no collisions when the same step has two
      # instances of an output id.
      dump_dir = tfdbg.DebugDumpDir(run_dir, validate=False)
      seq_datums = dump_dir.find(predicate=seq_filter)
      for seq_datum in seq_datums:
        sequences = np.array(seq_datum.get_tensor()).astype(int)[0]
        if "alive" in seq_datum.node_name:
          alive_sequences.append(sequences)
        if "finished" in seq_datum.node_name:
          finished_sequences.append(sequences)

        for sequence in sequences:
          pieces = self.targets_vocab.decode_list(sequence)
          index = sequence[-1]
          if index == 0:
            continue

          parent = decoding_graph.get_vertex(sequence_key(sequence[:-1]))
          current = decoding_graph.get_vertex(sequence_key(sequence))

          edge = decoding_graph.add_edge(parent, current)
          edge.data["label"] = pieces[-1]
          edge.data["label_id"] = index
          # Coerce the type to be a python bool.  Numpy bools can't be easily
          # converted to JSON.
          edge.data["completed"] = bool(index == 1)

      # Examine the score results and store the scores with the associated edges
      # in the graph.  We fetch the vertices (and relevant edges) by looking
      # into the saved beam sequences stored above.
      score_datums = dump_dir.find(predicate=scores_filter)
      for score_datum in score_datums:
        if "alive" in score_datum.node_name:
          sequences = alive_sequences.popleft()

        if "finished" in score_datum.node_name:
          sequences = finished_sequences.popleft()

        scores = np.array(score_datum.get_tensor()).astype(float)[0]
        for i, score in enumerate(scores):
          sequence = sequences[i]
          if sequence[-1] == 0:
            continue

          vertex = decoding_graph.get_vertex(sequence_key(sequence))
          edge = decoding_graph.edges[vertex.in_edges[0]]
          edge.data["score"] = score
          edge.data["log_probability"] = score
          edge.data["total_log_probability"] = score

    # Delete the hook dir to save disk space
    shutil.rmtree(hook_dir)

    # Create the graph visualization data structure.
    graph_vis = {
        "visualization_name": "graph",
        "title": "Graph",
        "name": "graph",
        "search_graph": decoding_graph.to_dict(),
    }

    # Create the processing visualization data structure.
    # TODO(kstevens): Make this method public
    # pylint: disable=protected-access
    output_ids = decoding._save_until_eos(result["outputs"].flatten(), False)
    output_pieces = self.targets_vocab.decode_list(output_ids)
    output_token = [{"text": piece} for piece in output_pieces]
    output = self.targets_vocab.decode(output_ids)

    source_steps = [{
        "step_name": "Initial",
        "segment": [{
            "text": query
        }],
    }]

    target_steps = [{
        "step_name": "Initial",
        "segment": output_token,
    }, {
        "step_name": "Final",
        "segment": [{
            "text": output
        }],
    }]

    processing_vis = {
        "visualization_name": "processing",
        "title": "Processing",
        "name": "processing",
        "query_processing": {
            "source_processing": source_steps,
            "target_processing": target_steps,
        },
    }

    return {
        "result": [processing_vis, graph_vis],
    }
示例#4
0
                           train_monitors=hooks,
                           eval_hooks=hooks)

ex.train()
accuracy_score = ex.evaluate()["accuracy"]


# In[ ]:


#python -m tensorflow.python.debug.examples.debug_tflearn_iris  --use_experiment --debug


# In[ ]:


# Let your BUILD target depend on "//tensorflow/python/debug:debug_py
# (You don't need to worry about the BUILD dependency if you are using a pip
#  install of open-source TensorFlow.)
from tensorflow.python import debug as tf_debug

hooks = [tf_debug.DumpingDebugHook("/shared/storage/location/tfdbg_dumps_1")]


# In[ ]:


#python -m tensorflow.python.debug.cli.offline_analyzer \
#    --dump_dir="/shared/storage/location/tfdbg_dumps_1/run_<epoch_timestamp_microsec>_<uuid>"

示例#5
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)

    # Path to file specifying all runtime and model parameters and how to process user command line input.
    config_file_path = os.path.join(PROJECT_MODEL_ROOT, "configs/default.json")

    # Argparse namespace combining json defaults and user command line inputs
    args = estimator_utils.init_basic_argument_parser(config_file_path)
    # Transfer all k:v pairs from the Argparse namespace to HParams
    hparams = tf.contrib.training.HParams(**vars(args))
    # Print stats about the current run
    print_run_info(args)

    # Calculate the number of steps needed to complete one epoch for each of the subsets
    steps_in_epoch_train = np.ceil(args.num_samples["train"] /
                                   args.train_batch_size)
    steps_in_epoch_val = np.ceil(args.num_samples["validation"] /
                                 args.validation_batch_size)

    # Number of training steps to perform during train_and_evaluate
    total_train_steps = int(steps_in_epoch_train * args.num_epochs)
    # Minimum number of steps during which no early stopping can occur
    train_steps_without_stopping = steps_in_epoch_train * args.train_epochs_without_stopping
    # Number of steps during which no metric improvement happened that is needed to initiate early stopping
    max_train_steps_without_improvement = int(
        steps_in_epoch_train * args.max_train_epochs_without_improvement)
    # Number of evaluation steps that are performed during each of the calls to evaluation during train_and_evaluate
    eval_steps_during_train = int(steps_in_epoch_val *
                                  args.eval_pc_during_train)
    # Number of steps during which evaluation is not performed
    train_steps_without_evaluation = int(steps_in_epoch_train *
                                         args.delay_evaluation_epochs)

    throttle_secs = args.save_checkpoints_secs
    save_checkpoints_steps = None
    # Only one of secs and steps for checkpointing frequency is allowed to be saved
    assert (args.save_checkpoints_secs
            is not None) ^ (args.checkpoint_freq_epochs is not None)
    if args.checkpoint_freq_epochs is not None:
        save_checkpoints_steps = np.ceil(
            steps_in_epoch_train *
            args.checkpoint_freq_epochs)  # TODO Ensure this is never zero
        throttle_secs = 1

    # Number of towers
    num_shards = args.num_gpu if args.num_gpu > 0 else 1

    # Path object pointing to the location where the checkpoints and results are saved
    # If model path is provided then load a previously instantiated model and train/evaluate
    # using the previous values.

    folder_naming_vars = []
    for x in args.folder_naming_vars:
        folder_naming_vars.append(
            eval(x))  # For some reason list comprehension doesn't work

    execution_date = time.strftime("%Y%b%d", time.localtime(
    )) if args.execution_date is None else args.execution_date

    # Sagemaker provides model_dir or when running elsewhere creates new model_dir or loads previous run via model_path
    if hparams.model_dir is None:
        model_dir = retrieve_model_dir(args.log_dir_path, args.model_path,
                                       execution_date, *folder_naming_vars)
        hparams.set_hparam("model_dir", model_dir)
        setattr(args, "model_dir", model_dir)

    # Path pointing to the location of the current data set (e.g. .../numpy/lastfm_10_pc)
    data_dir = os.path.join(
        args.data_dir_path if args.data_dir_path else "",
        "" if args.exec_loc == "sagemaker" else args.dataset,
        "tfrecords" if args.input_data_format == "tfrecords" else "",
        "sharded" if args.exec_loc == "sagemaker" else "")

    # Tensorflow device allocation settings
    config_proto = tf.ConfigProto(
        allow_soft_placement=args.allow_soft_placement,
        log_device_placement=args.log_device_placement)
    config_proto.gpu_options.allow_growth = True

    # Object specifying current run settings e.g. logging frequency and num of check points saved.
    run_config = tf.estimator.RunConfig(
        tf_random_seed=args.tf_random_seed,
        model_dir=args.model_dir,
        session_config=config_proto,
        save_summary_steps=20,
        save_checkpoints_steps=save_checkpoints_steps
        if not args.overwrite else 1,
        save_checkpoints_secs=args.save_checkpoints_secs,
        keep_checkpoint_max=args.keep_checkpoint_max,
        log_step_count_steps=100,
    )

    # Instantiate an Estimator object with the model_fn from this module.
    estimator = estimator_model.create_estimator(run_config, hparams)

    # The degree of shuffling - int. Check tf.Data.dataset.shuffle() for additional documentation.
    shuffle_train = int(args.num_samples["train"] *
                        args.shuffle_train) if args.shuffle_train else 1
    shuffle_val = int(args.num_samples["val"] *
                      args.shuffle_test) if args.shuffle_test else 1

    additional_arrays = ["weights"] if args.use_weights else []

    # https://cloud.google.com/blog/products/gcp/easy-distributed-training-with-tensorflow-using-tfestimatortrain-and-evaluate-on-cloud-ml-engine
    with tf.name_scope("TrainSpec_and_hook"):
        with tf.name_scope("Early_stop_hook"):
            try:
                os.makedirs(estimator.eval_dir())
            except FileExistsError:
                pass

            training_hooks = []

            early_stopping_hook = estimator_utils.make_early_stopping_hook(
                estimator=estimator,
                metric_name=args.key_metrics[0],
                max_train_steps_without_improvement=
                max_train_steps_without_improvement,
                min_steps=train_steps_without_stopping,
                run_every_secs=None,
                run_every_steps=1)
            if args.early_stopping:
                training_hooks.append(early_stopping_hook)

            # from https://stackoverflow.com/questions/45719176/how-to-display-runtime-statistics-in-tensorboard-using-estimator-api-in-a-distri
            if args.metadata_hook_saving_frequency:
                runtime_stats_hook = estimator_utils.MetadataHook(
                    save_secs=args.metadata_hook_saving_frequency,
                    output_dir=str(args.model_dir))
                training_hooks.append(runtime_stats_hook)

            if args.profiler_hook:
                profiler_hook = tf.train.ProfilerHook(
                    save_steps=10,
                    save_secs=None,
                    output_dir=str(os.path.join(args.model_dir, "timelines")),
                    show_memory=True)
                training_hooks.append(profiler_hook)

            # Debugging
            if args.tensorboard_debug_address:
                debug_hook = tf_debug.TensorBoardDebugHook(
                    args.tensorboard_debug_address)
                training_hooks.append(debug_hook)
            if args.debug:
                debug_hook = tf_debug.LocalCLIDebugHook()
                training_hooks.append(debug_hook)
            if args.debug:
                debug_hook = tf_debug.DumpingDebugHook(args.debug_dump_path)
                training_hooks.append(debug_hook)

        with tf.name_scope("TrainSpec"):
            train_spec = tf.estimator.TrainSpec(
                input_fn=lambda: estimator_model.input_fn(
                    data_dir=data_dir,
                    subset="train",
                    num_shards=num_shards,
                    batch_size=args.train_batch_size,
                    X_cols_to_use=args.X_cols_to_use,
                    input_data_format=args.input_data_format,
                    shuffle=shuffle_train,
                    additional_arrays=additional_arrays,
                    delta_t_mean=args.delta_t_mean,
                    delta_t_std=args.delta_t_std),
                max_steps=total_train_steps if not args.overwrite else 10,
                hooks=training_hooks)

    with tf.name_scope("EvalSpec_and_exporter"):
        with tf.name_scope("Exporter"):
            # TODO Define function to process the input e.g. seq for the whole user - this function used to simulate real data
            exporters = []
            for key_metric in args.key_metrics:
                exporters.append(
                    tf.estimator.BestExporter(
                        name=key_metric,
                        serving_input_receiver_fn=estimator_model.
                        serving_input_fn(args),
                        compare_fn=estimator_checkpointing.
                        custom_checkpoint_compare_fn(default_key=key_metric),
                        exports_to_keep=1,
                        as_text=False))

        with tf.name_scope("EvalSpec"):
            eval_spec = tf.estimator.EvalSpec(
                input_fn=lambda: estimator_model.input_fn(
                    data_dir=data_dir,
                    subset="validation",
                    num_shards=num_shards,
                    batch_size=args.validation_batch_size,
                    X_cols_to_use=args.X_cols_to_use,
                    input_data_format=args.input_data_format,
                    shuffle=shuffle_val,
                    additional_arrays=additional_arrays,
                    delta_t_mean=args.delta_t_mean,
                    delta_t_std=args.delta_t_std),
                exporters=exporters if args.use_exporter else None,  #TODO
                steps=eval_steps_during_train if not args.overwrite else 1,
                throttle_secs=throttle_secs,
                start_delay_secs=args.start_delay_secs)

    if train_steps_without_evaluation > 0:
        print(
            "Starting preliminary training for {} steps during which no evaluation is performed."
            .format(train_steps_without_evaluation))
        estimator.train(input_fn=lambda: estimator_model.input_fn(
            data_dir=data_dir,
            subset="train",
            num_shards=num_shards,
            batch_size=args.train_batch_size,
            X_cols_to_use=args.X_cols_to_use,
            input_data_format=args.input_data_format,
            shuffle=shuffle_train,
            additional_arrays=additional_arrays,
            delta_t_mean=args.delta_t_mean,
            delta_t_std=args.delta_t_std),
                        max_steps=train_steps_without_evaluation
                        if not args.overwrite else 10,
                        hooks=training_hooks)
        # Export the model for the offchance that the metrics for validation don't improve after the first run
        # when I believe no export is performed
        export_dir = os.path.join(args.model_dir, "export",
                                  args.key_metrics[0])
        estimator.export_savedmodel(export_dir,
                                    estimator_model.serving_input_fn(args),
                                    strip_default_attrs=True)

    print(
        "Starting Train and Evaluate for {} training steps with Evaluation every {} second(s) or {} steps for {} evaluation steps."
        .format(total_train_steps, throttle_secs, save_checkpoints_steps,
                eval_steps_during_train))

    with tf.name_scope("Train_and_Evaluate"):
        tf.estimator.train_and_evaluate(estimator=estimator,
                                        train_spec=train_spec,
                                        eval_spec=eval_spec)
    if args.exec_loc == "sagemaker":
        updated_model_path = estimator_sagemaker.sagemaker_postprocessing(args)
        predictor_param_names = [
            "predictor_s3_input_path", "predictor_s3_output_path",
            "predictor_batch_size"
        ]
        predictor_params = [getattr(args, x) for x in predictor_param_names]
        if np.all([x is not None for x in predictor_params]):
            estimator_sagemaker.predict_s3_numpy(
                saved_model_path=updated_model_path,
                input_s3_path=args.predictor_s3_input_path,
                output_s3_path=args.predictor_s3_output_path,
                batch_size=args.predictor_batch_size)
    else:

        # Evaluate trained model
        steps_in_epoch_test = np.ceil(args.num_samples["test"] /
                                      args.validation_batch_size)
        shuffle_test = args.num_samples["train"] if args.shuffle_test else 1

        with tf.name_scope("Evaluate_trained_model"):

            train_input_fn = lambda: estimator_model.input_fn(
                data_dir=data_dir,
                subset="train",
                num_shards=
                num_shards,  #Switch to one and adjust bs/num_gpu for single device
                batch_size=args.
                train_batch_size,  #TODO Does that work for serving
                X_cols_to_use=args.X_cols_to_use,
                input_data_format=args.input_data_format,
                shuffle=shuffle_train,
                additional_arrays=additional_arrays,
                delta_t_mean=args.delta_t_mean,
                delta_t_std=args.delta_t_std)

            test_input_fn = lambda: estimator_model.input_fn(
                data_dir=data_dir,
                subset="test",
                num_shards=num_shards,
                batch_size=args.validation_batch_size,
                X_cols_to_use=args.X_cols_to_use,
                input_data_format=args.input_data_format,
                shuffle=shuffle_test,
                additional_arrays=additional_arrays,
                delta_t_mean=args.delta_t_mean,
                delta_t_std=args.delta_t_std)

            if not args.final_eval_multiple_models:

                # Find best checkpoint and its associated metrics
                best_checkpoint_path, best_checkpoint_metrics = estimator_checkpointing.best_checkpoint(
                    model_dir=args.model_dir,
                    eval_dir=estimator.eval_dir(),
                    metric=args.key_metrics[0])
                print("Best checkpoint: {}".format(best_checkpoint_path))
                print("Best metrics: {}".format(best_checkpoint_metrics))

                # Remove model_dir from previous run_config as that causes evaluation to ignore warm_start_from
                eval_run_config = deepcopy(run_config)
                setattr(eval_run_config, "_model_dir", None)

                # New estimator restarted with best result for user-specified metric
                estimator = estimator_model.create_estimator(
                    eval_run_config,
                    hparams,
                    warm_start_from=best_checkpoint_path)

                train_results = estimator.evaluate(input_fn=train_input_fn,
                                                   steps=steps_in_epoch_train)
                print("Final evaluation on train subset: {}".format(
                    train_results))

                test_results = estimator.evaluate(input_fn=test_input_fn,
                                                  steps=steps_in_epoch_test)
                print(
                    "Final evaluation on test subset: {}".format(test_results))

            else:
                estimator_checkpointing.evaluate_multiple_checkpoints(
                    model_dir=args.model_dir,
                    eval_dir=estimator.eval_dir(),
                    num_checkpoints=args.keep_checkpoint_max,
                    metric=args.key_metrics[0],
                    input_fn=test_input_fn,
                    run_config=run_config,
                    hparams=hparams,
                    num_steps_in_eval=steps_in_epoch_test
                    if not args.overwrite else 1)

        if args.clear_checkpoints:

            rm_graph_command = "for f in $(find {} -name 'graph.pbtxt'); do rm $f; done".format(
                str(model_dir))
            rm_checkpoints_command = "for f in $(find {} -name 'model.ckpt-*'); do rm $f; done".format(
                str(model_dir))

            process = subprocess.run(rm_graph_command, shell=True, check=True)
            process = subprocess.run(rm_checkpoints_command,
                                     shell=True,
                                     check=True)

            print("Cleared model_dir: {}".format(str(model_dir)))
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--watch_gpu', required=True, type=int, help="watch gpu id filled Set it the same as visible gpu id")
    parser.add_argument('--debug', default=True, type=bool)
    parser.add_argument('--stop_globalstep', default=1000, type=int)
    parser.add_argument('--checkpoint_dir', default="checkpoint_dir",type=str)
    parser.add_argument('--task_index',default=0, type=int)
    
    prof_save_step = cfg.PROFILER_SAVE_STEP #120
    sum_save_step = cfg.SUMMARY_SAVE_STEP #500
    FLAGS, unparsed = parser.parse_known_args()
    
    initial_learning_rate = cfg.LEARNING_RATE
    decay_steps = cfg.DECAY_STEPS
    decay_rate = cfg.DECAY_RATE
    staircase = cfg.STAIRCASE
    
    ########################dir#######################################
    singlepipe_dir = "Single_Pipe_train_logs"
    if not os.path.exists(singlepipe_dir):
        os.makedirs(singlepipe_dir)
    
    inside_bsnQnM_dir = "Single_Pipe"+cfg.BS_NT_MUL_PREFIX
    logrootpath = os.path.join(singlepipe_dir, inside_bsnQnM_dir)
    if not os.path.exists(logrootpath):
        os.makedirs(logrootpath)
    
    fpslog_name = "Single_Pipe"+cfg.BS_NT_MUL_PREFIX+ "fps_log.txt"
    concated_path = logrootpath + "/" + fpslog_name

    checkpoint_dir = FLAGS.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    gpulog_name = "Single_Pipe"+"gpu"+str(FLAGS.watch_gpu)+cfg.BS_NT_MUL_PREFIX+"_gpulog.txt"

    ###########################gpusubprocess##############################
    def start_gpulog(path, fname):
        # has to be called before start of training
        gpuinfo_path = path + "/" + fname
        with open(gpuinfo_path, 'w'):
            argument = 'timestamp,count,gpu_name,gpu_bus_id,memory.total,memory.used,utilization.gpu,utilization.memory'
        try:
            proc = subprocess.Popen(
                ['nvidia-smi --format=csv --query-gpu=%s %s %s %s' % (argument, ' -l 1', '-i '+ str(FLAGS.watch_gpu), '-f '+ gpuinfo_path)], shell=True)
        except KeyboardInterrupt:
            try:
                proc.terminate()
            except OSError:
               pass
               proc.wait()
        return proc

    #########################pipeline###################################
    tf.reset_default_graph()
    
    image_producer = Pascal_voc('train')
    
    (current_index, image, label) = image_producer.get_one_image_label_element()
    current_index = tf.Print(current_index, data=[current_index],
                     message="CURRENT INDEX OF IMAGE IS :")

    image_shape = (image_producer.image_size, image_producer.image_size, 3)

    label_size = (image_producer.cell_size, image_producer.cell_size, 25)  # possible value is 0 or 1

    processed_queue = tf.FIFOQueue(capacity=int(image_producer.batch_size * 1.5),
    shapes = [image_shape, label_size],
    dtypes = [tf.float32, tf.float32],
    name = 'processed_queue')

    enqueue_processed_op = processed_queue.enqueue([image, label])

    num_enqueue_threads = min(image_producer.num_enqueue_threads, image_producer.gt_labels_length)

    queue_runner = tf.train.QueueRunner(processed_queue, [enqueue_processed_op] * num_enqueue_threads)
    tf.train.add_queue_runner(queue_runner)

    (images, labels) = processed_queue.dequeue_many(image_producer.batch_size)
    if FLAGS.debug == True:
        images = tf.Print(images, data=[processed_queue.size()],
                          message="Worker %d get_batch(), BatchSize %d, Queues left:" % (
                              FLAGS.task_index, cfg.BATCH_SIZE))

    #########################graph###################################
    
    with tf.device("/device:GPU:"+str(FLAGS.watch_gpu)):
        yolo = YOLONet(images, labels)
        
        global_step = tf.train.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(
            initial_learning_rate, global_step, decay_steps,
            decay_rate, staircase, name='learning_rate')
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate)
        train_op = slim.learning.create_train_op(
            yolo.total_loss, optimizer, global_step=global_step)
        
    #########################hook#####################################
    
    profiler_hook = tf.train.ProfilerHook(save_steps=prof_save_step, output_dir=logrootpath, show_memory=True,show_dataflow=True)

    summary_op = tf.summary.merge_all()
    summary_hook = tf.train.SummarySaverHook(save_steps=sum_save_step, output_dir=logrootpath, summary_op=summary_op)
    debug_hook=tf_debug.DumpingDebugHook("debug_dir")
    if FLAGS.debug == True:
        tensors_to_log = [global_step, yolo.total_loss]
        def formatter(curvals):
            print("Global step %d, Loss %f!" % (
                curvals[global_step], curvals[yolo.total_loss]))
        logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=10, formatter=formatter)
        hooks = [debug_hook,tf.train.StopAtStepHook(last_step=FLAGS.stop_globalstep), logging_hook, profiler_hook, summary_hook]
    else:
        hooks = [debug_hook,tf.train.StopAtStepHook(last_step=FLAGS.stop_globalstep), profiler_hook, summary_hook]

    config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    config.gpu_options.allow_growth = True
    # config.gpu_options.allocator_type = 'BFC'
    # config.gpu_options.per_process_gpu_memory_fraction = 0.8
    proc = start_gpulog(logrootpath, gpulog_name)

    #######################train#####################################
    
    print('Start training ...')
    with tf.train.MonitoredTrainingSession(config=config, hooks=hooks, checkpoint_dir=checkpoint_dir, save_checkpoint_secs=3600) as sess:
    
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        
        start_global_step_value = sess.run(global_step)
        timer = Timer(start_global_step_value)

        iters_per_toc = 20
        txtForm = "Training speed: local avg %f fps, global %f fps, loss %f, global step: %d, predict to wait %s"
        local_max_iter = FLAGS.stop_globalstep - start_global_step_value
        
        timer.tic()
        yolo_loss, global_step_value, _ = sess.run([yolo.total_loss, global_step, train_op])
        n = 1
        while not sess.should_stop():
            n = n + 1
            if n > 0 and n % iters_per_toc == 0:
                if n > 0 and n % iters_per_toc == 0:
                    local_avg_fps, global_avg_fps = timer.toc(iters_per_toc, global_step_value)
                    timetowait = timer.remain(n, local_max_iter)
            
                    txtData = local_avg_fps, global_avg_fps, yolo_loss, global_step_value, timetowait
                    print(txtForm % txtData)
                    
                    with open(concated_path, 'a+') as log:
                        log.write("%.4f,%.4f,%.4f,%d,%s\n" % txtData)
                    
                    timer.tic()
    
            yolo_loss, global_step_value, _ = sess.run([yolo.total_loss, global_step, train_op])
        
        coord.request_stop()
        coord.join(threads)
        
    print('Done training.')

    try:
        proc.terminate()
    except OSError:
        pass
        print("Kill subprocess failed. Kill nvidia-smi mannually")