train_batch_num = int( np.ceil( float(train_img_cnt) / (config_common.model_set["batch_size"] * len(gpu_device)))) - 2 val_batch_num = int( np.ceil( float(val_img_cnt) / (config_common.model_set["batch_size"] * len(gpu_device)))) - 2 with tf.Graph().as_default(), tf.device('/cpu:0'): # setting placeholders is_training = tf.placeholder(dtype=tf.bool, name="phase_train") handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') shuffle_and_overwrite( config_common.data_set["train_file_path"]) #每轮都随机打乱数据集顺序 train_dataset = tf.data.TextLineDataset( config_common.data_set["train_file_path"]) train_dataset = train_dataset.apply( tf.contrib.data. map_and_batch(lambda x: tf.py_func(parse_data, [ x, classes_list, class_num, config_common.model_set[ "image_size"], anchors, 'train', config_common. data_set["data_save_path_temp"], config_common.data_set[ "nfs_mount_path"], class_num_dic, config_common. model_set["train_with_gray"], config_common.data_set[ "fill_zero_label_names"] ], [tf.float32, tf.float32, tf.float32, tf.float32, tf.string] ), num_parallel_calls=config_common. model_set["num_threads"],
help="Warm up training epoches.") args = parser.parse_args() args.anchors = parse_anchors(args.anchor_path) args.classes = read_class_names(args.class_name_path) args.class_num = len(args.classes) args.train_img_cnt = len(open(args.train_file, 'r').readlines()) args.val_img_cnt = len(open(args.val_file, 'r').readlines()) args.train_batch_num = int(np.ceil(float(args.train_img_cnt) / args.batch_size)) args.val_batch_num = int(np.ceil(float(args.val_img_cnt) / args.batch_size)) logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=args.progress_log_path, filemode='w') # 设置日志 is_training = tf.placeholder(dtype=tf.bool, name="phase_train") handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') # 数据处理 shuffle_and_overwrite(args.train_file) train_dataset = tf.data.TextLineDataset(args.train_file) train_dataset = train_dataset.apply(tf.contrib.data.map_and_batch( lambda x: tf.py_func(parse_data, [x, args.class_num, args.img_size, args.anchors, 'train'], [tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads, batch_size=args.batch_size)) train_dataset = train_dataset.prefetch(args.prefetech_buffer) val_dataset = tf.data.TextLineDataset(args.val_file) val_dataset = val_dataset.apply(tf.contrib.data.map_and_batch( lambda x: tf.py_func(parse_data, [x, args.class_num, args.img_size, args.anchors, 'val'], [tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads, batch_size=args.batch_size)) val_dataset.prefetch(args.prefetech_buffer) # creating two dataset iterators train_iterator = train_dataset.make_initializable_iterator() val_iterator = val_dataset.make_initializable_iterator() # creating two dataset handles
datefmt='%a, %d %b %Y %H:%M:%S', filename=args.progress_log_path, filemode='w') # setting placeholders is_training = tf.placeholder(dtype=tf.bool, name="phase_train") handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') ################## # tf.data pipeline ################## # Selecting `feedable iterator` to switch between training dataset and validation dataset # manually shuffle the train txt file because tf.data.shuffle is soooo slow!! # you can google it for more details. shuffle_and_overwrite(args.train_file) train_dataset = tf.data.TextLineDataset(args.train_file) train_dataset = train_dataset.apply( tf.data.experimental.map_and_batch(lambda x: tf.py_func( parse_data, [x, args.class_num, args.img_size, args.anchors, 'train'], [tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads, batch_size=args.batch_size)) train_dataset = train_dataset.prefetch(args.prefetech_buffer) val_dataset = tf.data.TextLineDataset(args.val_file) val_dataset = val_dataset.apply( tf.data.experimental.map_and_batch(lambda x: tf.py_func( parse_data, [x, args.class_num, args.img_size, args.anchors, 'val'], [tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads,
datefmt='%a, %d %b %Y %H:%M:%S', filename=flag.progress_log_path, filemode='w') # setting placeholders is_training = tf.placeholder(dtype=tf.bool, name="phase_train") handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') ################## # tf.data pipeline ################## # Selecting `feedable iterator` to switch between training dataset and validation dataset # manually shuffle the train txt file because tf.data.shuffle is soooo slow!! # you can google it for more details. shuffle_and_overwrite(flag.train_file) train_dataset = tf.data.TextLineDataset(flag.train_file) train_dataset = train_dataset.apply( tf.contrib.data.map_and_batch(lambda x: tf.py_func(double_parse_data, [ x, flag.class_num, flag.img_size, flag.anchors, 'train' ], [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32]), batch_size=flag.batch_size)) train_dataset = train_dataset.prefetch(flag.prefetech_buffer) val_dataset = tf.data.TextLineDataset(flag.val_file) val_dataset = val_dataset.apply( tf.contrib.data.map_and_batch(lambda x: tf.py_func(double_parse_data, [ x, flag.class_num, flag.img_size, flag.anchors, 'val' ], [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32]), batch_size=flag.batch_size)) val_dataset.prefetch(flag.prefetech_buffer)