示例#1
0
def main(_):
    # check the running platform
    if platform.uname()[1] != 'dragonx-H97N-WIFI':
        print("Now it knows it's in a remote cluster")
        FLAGS.system = "remote"
        FLAGS.data_path = "/home/lxiaol9/ARC/EASTRNN/data/ICDAR/train"
        FLAGS.vis_path = "/home/lxiaol9/ARC/EASTRNN/vis/LSTM/"
        FLAGS.save_path = "/work/cascades/lxiaol9/ARC/EAST/checkpoints/LSTM_east/" + now.strftime("%Y%m%d-%H%M%S")
        FLAGS.video_path = "/home/lxiaol9/ARC/EASTRNN/data/ICDAR2013/train/"
        FLAGS.checkpoints_path = FLAGS.save_path
        FLAGS.pretrained_model_path = "/work/cascades/lxiaol9/ARC/EAST/checkpoints/east/20180921-173054/"
    print("############## Step1: The environment path has been set up ###############")
    gpus = [x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"]
    if (FLAGS.num_gpus > len(gpus)):
        raise ValueError("Your machine has only %d gpus "
        "which is less than the requested --num_gpus=%d."
        % (len(gpus), FLAGS.num_gpus))
    config = get_config(FLAGS)
    config.batch_size = 8
    config.num_layers = 3
    config.num_steps  = 10))
示例#2
0
    flags.DEFINE_boolean("source", False, "whether load data from source")
    flags.DEFINE_boolean("dis_plt", False,
                         "whether using pyplot real-time display ")
    flags.DEFINE_integer('save_checkpoint_steps', 1000, '')
    flags.DEFINE_integer('save_summary_steps', 100, '')
    flags.DEFINE_string(
        'pretrained_model_path',
        '/media/dragonx/DataStorage/ARC/EASTRNN/weights/EAST/resnet_v1_50.ckpt',
        '')
    flags.DEFINE_string('geometry', 'RBOX', 'set for bb')
    FLAGS = flags.FLAGS

    save_path = '/media/dragonx/DataStorage/ARC/EASTRNN/checkpoints/LSTM/'
    #train_input = DetectorInputMul(save_path, 1, 2, 0)
    print("data has been loaded")
    config = get_config(FLAGS)
    # Global initializer for Variables in the model
    gpu_options = tf.GPUOptions(allow_growth=True)
    #global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    # log: May 3rd, we need to adapt the model input, with config
    with tf.name_scope("Train"):
        # use placeholder to stand for input and targets
        initializer = tf.random_normal_initializer()
        x_train = tf.placeholder(tf.float32,
                                 shape=[None, config.num_steps, None, None, 3])
        m = ArrayModel(True,
                       config,
                       x_train,
                       reuse_variables=None,
                       initializer=initializer)
    with tf.name_scope("Val"):
示例#3
0
def main(_):
    # to increase the code robustness
    if platform.uname()[1] != 'dragonx-H97N-WIFI':
        print("Now it knows it's in a remote cluster")
        FLAGS.system = "remote"
        FLAGS.data_path = "/home/lxiaol9/ARC/EASTRNN/data/GAP_process/"
        FLAGS.vis_path = "/home/lxiaol9/ARC/EASTRNN/vis/LSTM/"
        FLAGS.save_path = "/home/lxiaol9/ARC/EASTRNN/checkpoints/LSTM/" + now.strftime(
            "%Y%m%d-%H%M%S")
        FLAGS.video_path = "/home/lxiaol9/ARC/EASTRNN/data/ICDAR/train/"
        FLAGS.checkpoints_path = "/home/lxiaol9/ARC/EASTRNN/weights/EAST/east_icdar2015_resnet_v1_50_rbox/"
        FLAGS.pretrained_model_path = "/home/lxiaol9/ARC/EASTRNN/weights/EAST/resnet_v1_50.ckpt"
    if not FLAGS.data_path:
        raise ValueError("Must set --")
    print(
        "############## Step1: The environment path has been set up ###############"
    )
    if not FLAGS.data_path:
        raise ValueError("Must set --")
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if FLAGS.num_gpus > len(gpus):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))
    config = get_config(FLAGS)
    config.batch_size = 1
    with tf.Graph().as_default():
        # Global initializer for Variables in the model
        initializer = tf.random_normal_initializer()
        # Construct the model graph
        with tf.name_scope("Train"):
            initializer = tf.random_normal_initializer()
            # use placeholder to stand for input and targets
            x_train = tf.placeholder(
                tf.float32, shape=[None, config.num_steps, None, None, 3])
            model = ArrayModel(True,
                               config,
                               x_train,
                               reuse_variables=None,
                               initializer=initializer)
        # ======================== initialize from the saved weights ============================#
        if platform.uname()[1] != 'dragonx-H97N-WIFI':
            checkpoint_path = "/home/lxiaol9/ARC/EASTRNN/checkpoints/LSTM/"
        else:
            checkpoint_path = "/media/dragonx/DataStorage/ARC/EASTRNN/checkpoints/ARC/checkpoints/LSTM/"
        if not os.path.exists(checkpoint_path):
            raise RuntimeError(
                'Checkpoint `{}` not found'.format(checkpoint_path))
        saver = tf.train.Saver()
        # restore the model from weights
        session = tf.Session(config=tf.ConfigProto(allow_soft_placement=False))
        model_path = os.path.join(checkpoint_path, '20180818-170857-11100')
        logger.info('Restore from {}'.format(model_path))
        saver.restore(session, model_path)
        print(
            "##############Step 2: Weight restoring successfully ################"
        )
        ################### load  all data into memory ###################
        if FLAGS.source is True:
            datapath = FLAGS.data_path
            test_input = HeatInputMul(datapath, 1, 12, 2)
            input = DetectorInputMul(
                datapath, 1, 12,
                1)  # datapath, video_start, video_end, dimension
        else:
            datapath = FLAGS.data_path
            test_input = HeatInputMul(datapath, 1, 12, 0)
            input = DetectorInputMul(
                datapath, 1, 12,
                1)  # we will use input.targets[videos, frames, vect]
        print(
            "##############Step 3: Heatmap, GT data is ready now################"
        )
        ################### choose video and frame to test ###############
        i = test_input.video_name.index('Video_37_2_3')
        iters = 0.0
        state = session.run(model.initial_state)
        # tensors dict
        fetches = {
            #"cost": m.cost,
            "final_state": model.final_state,
            # "loss": m.loss,
            "heat_map_pred": model.heatmap_predict
        }
        # frames sequence: 0, 1, 2;3, 4, 5;6, 7, 8
        for step in range(int((test_input.cnt_frame[i] - 1) / 3)):
            feed_dict = {}
            data = np.zeros([
                config.batch_size, config.num_steps, config.shape[0],
                config.shape[1], 3
            ],
                            dtype=np.float32)
            heat_maps = np.zeros([
                config.batch_size, config.num_steps,
                int(config.shape[0] / 4),
                int(config.shape[0] / 4), 1
            ],
                                 dtype=np.float32)
            # randomly choosing starting frame
            frame_set = []
            video_file = FLAGS.video_path + test_input.video_name[i] + '.mp4'
            gt_path = FLAGS.data_path + test_input.video_name[i] + '/gt/'
            # frame number to choose
            j = step * 3
            cap = cv2.VideoCapture(video_file)
            for m in range(config.num_steps):
                cap.set(1, (j + m))
                ret, frame = cap.read()
                data[0, m, :, :, :] = cv2.resize(
                    frame, (config.shape[0], config.shape[1]))
                heat_maps[0, m, :, :, 0] = cv2.resize(
                    np.squeeze(
                        np.load(gt_path + 'frame' +
                                '{0:03d}'.format(j + m + 1) + '.npy')),
                    (128, 128))
            # print('choosing starting frame %d, with num_steps is %d' % (j, config.num_steps))
            cap.release()
            frame_set.append(j)

            feed_dict[model.input_data] = data
            feed_dict[model.input_heat_maps] = heat_maps
            for i, (c, h) in enumerate(model.initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h
            vals = session.run(fetches, feed_dict=feed_dict)
            state = vals["final_state"]
            heat_map_pred = vals["heat_map_pred"]
            print(len(heat_map_pred))
            #
            iters += config.num_steps
            npyname1 = FLAGS.vis_path + 'Video_37_2_3/' + 'frame' + format(
                j, '03d')
            npyname2 = FLAGS.vis_path + 'Video_37_2_3/' + 'frame' + format(
                j + 1, '03d')
            npyname3 = FLAGS.vis_path + 'Video_37_2_3/' + 'frame' + format(
                j + 2, '03d')
            np.save(npyname1, heat_map_pred[0])
            np.save(npyname2, heat_map_pred[1])
            np.save(npyname3, heat_map_pred[2])
            print("saving frame at %d" % (j))
def main():
    checkpoint_path = '/media/dragonx/DataStorage/ARC/EAST/checkpoints/LSTM_east/'
    idname1 = '20180924-191410'
    idname2 = '20180924-191410-5001'
    test_data_path = '/media/dragonx/DataLight/ICDAR2013/test/'
    save_path = '/media/dragonx/DataLight/ICDAR2013/test_results_lstm/'
    filename = '/media/dragonx/DataLight/ICDAR2013/test/Video_6_3_2.mp4'
    idx = 0  # initial frame number
    config = get_config(FLAGS)
    config.batch_size = 1
    config.num_layers = 3
    config.num_steps = 10
    #>>>>>>>>>>>>>>>>>>>>>>Sort test video>>>>>>>>>>>>>>>>>>>>>>>>>>>#
    video_set = []
    for root, dirs, files in os.walk(test_data_path):
        for file in files:
            if file.endswith('.mp4'):
                video_set.append(os.path.splitext(file)[0])
    index = range(0, 1)
    # parser for running outside
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--checkpoint-path', default=checkpoint_path)
    # args = parser.parse_args()
    if not os.path.exists(checkpoint_path):
        raise RuntimeError('Checkpoint `{}` not found'.format(checkpoint_path))

    logger.info('loading model')
    #>>>>>>>>>>>>>>>>>>>>>>> Loading Model >>>>>>>>>>>>>>>>>>>>>>>>>#
    gpu_options = tf.GPUOptions(allow_growth=True)
    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    # global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

    # Global initializer for Variables in the model
    # log: May 3rd, we need to adapt the model input, with config
    # with tf.name_scope("Train"):
    #     # use placeholder to stand for input and targets
    #     initializer = tf.random_normal_initializer()
    #     x_train = tf.placeholder(tf.float32, shape=[None, config.num_steps, None, None, 3])
    #     m = ArrayModel(True, config, x_train, reuse_variables=None, initializer=initializer)
    with tf.name_scope("Val"):
        # use placeholder to stand for input and targets
        initializer = tf.random_normal_initializer()
        x_val = tf.placeholder(tf.float32,
                               shape=[None, config.num_steps, None, None, 3])
        model = ArrayModel(False,
                           config,
                           x_val,
                           reuse_variables=None,
                           initializer=initializer)
    var_total = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    print(var_total)
    #>>>>>>>>>>>>>>>>>>>>>>>> restore the model from weights>>>>>>>>#
    soft_placement = False
    # var_list1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='feature_fusion')
    # var_list2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50')
    # var_list3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='multi_rnn_cell')
    # var_list4 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='pred_module')
    # var_list = var_list1 + var_list2 + var_list3 + var_list4
    # saver = tf.train.Saver({v.op.name: v for v in var_list})
    saver = tf.train.Saver()
    config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
    # with sv.managed_session(config=config_proto) as session:
    #     if FLAGS.restore:
    #         print('continue training from previous checkpoint')
    #         # ckpt = tf.train.latest_checkpoint(FLAGS.checkpoints_path)
    #         ckpt = checkpoint_path + idname1 + '/' + idname2
    #         sv.saver.restore(session, ckpt)
    model_path = checkpoint_path + idname1 + '/' + idname2
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    logger.info('Restore from {}'.format(model_path))
    saver.restore(sess, model_path)
    #>>>>>>>>>>>>>>>>>>>>>>Start evaluation>>>>>>>>>>>>>>>>>>>>>>>>>#
    P_test = []
    R_test = []
    f1_test = []
    for k in index:
        P_video = []
        R_video = []
        f1_video = []
        video_save = save_path + video_set[k] + idname1 + '_' + idname2 + '.avi'
        t_start = time.time()
        # sort up all the paths
        xml_solo_path = test_data_path + video_set[k]
        raw_video_path = test_data_path + video_set[k] + '.mp4'
        cap = cv2.VideoCapture(raw_video_path)
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        cnt_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        out = cv2.VideoWriter(video_save,
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))
        # 1. load both polys and tags; 2. generate geo maps(the format of polys and tags need to match)
        polys_array_list, tags_array_list, id_list_list, frame_num = load_annotations_solo(xml_solo_path, \
                    1, cnt_frame, frame_width, frame_height)
        #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>loop over frames in the time steps >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
        for i in range(int(cnt_frame / config.num_steps)):
            data_seq = np.zeros((1, config.num_steps, 512, 512, 3),
                                dtype=np.float32)
            data_original = np.zeros(
                (1, config.num_steps, frame_height, frame_width, 3),
                dtype=np.float32)
            for j in range(config.num_steps):
                ret, frame = cap.read()
                # im_resized = cv2.resize(frame, (int(512), int(512)))
                im_resized = frame[0:512, 0:512, :]
                data_original[0, j, :, :, :] = frame
                data_seq[0, j, :, :, :] = im_resized
            #>>>>>>>>>>>>>>>>>>>>>>>>>Now it's time to run the model>>>>>>>>>>>>>>>>>>>>>>>>>>
            state = sess.run(model.initial_state)
            # tensors dict to run
            fetches = {
                "score_map": model.score_map_set,
                "geometry_map": model.geometry_set
            }
            feed_dict = {}
            feed_dict[model.input_data] = data_seq
            for i, (c, h) in enumerate(model.initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h
            timer = collections.OrderedDict([('net', 0), ('restore', 0),
                                             ('nms', 0)])
            start = time.time()
            vals = sess.run(fetches, feed_dict=feed_dict)
            timer['net'] = time.time() - start
            #>>>>>>>>>>>>>>>>>>>>>>>>Okay!!!We could evalute the results now>>>>>>>>>>>>>>>>>>>
            for j in range(config.num_steps):
                rtparams = collections.OrderedDict()
                rtparams['start_time'] = datetime.datetime.now().isoformat()
                rtparams['image_size'] = '{}x{}'.format(
                    frame_width, frame_height)
                # im_resized, (ratio_h, ratio_w) = resize_image(img)
                ratio_h, ratio_w = 512 / frame_height, 512 / frame_width
                rtparams['working_size'] = '{}x{}'.format(512, 512)
                # results refinement via NMS
                score = vals["score_map"][j]
                geometry = vals["geometry_map"][j]
                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                logger.info(
                    'net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                        timer['net'] * 1000, timer['restore'] * 1000,
                        timer['nms'] * 1000))
                if boxes is not None:
                    scores = boxes[:, 8].reshape(-1)
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start
                timer['overall'] = duration
                logger.info('[timing] {}'.format(duration))
                text_lines = []
                if boxes is not None:
                    text_lines = []
                    for box, score in zip(boxes, scores):
                        box = sort_poly(box.astype(np.int32))
                        if np.linalg.norm(box[0] -
                                          box[1]) < 5 or np.linalg.norm(
                                              box[3] - box[0]) < 5:
                            continue
                        tl = collections.OrderedDict(
                            zip([
                                'x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'
                            ], map(float, box.flatten())))
                        tl['score'] = float(score)
                        text_lines.append(tl)
                pred = {
                    'text_lines': text_lines,
                    'rtparams': rtparams,
                    'timing': timer,
                }
                text_polys, text_tags = polys_array_list[
                    i * 10 + j], tags_array_list[i * 10 + j]
                text_polys, text_tags = check_and_validate_polys(
                    text_polys, text_tags, (frame_height, frame_width))
                # out.write(new_img)
                #>>>>>>>>>>>>>>>>>>>>>>>>Evaluation>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
                targets = text_polys
                precision, recall, f1 = eval_single_frame(targets, pred)
                P_video.append(precision)
                R_video.append(recall)
                f1_video.append(f1)
                img = data_original[0, j, :, :, :]
                new_img = draw_illu(img.copy(), pred)
                new_img1 = draw_illu_gt(new_img.copy(), targets, precision,
                                        recall, f1)
                out.write(new_img1)
                # using for pre-testing
                if j == 0 and FLAGS.vis:
                    fig1 = plt.figure(figsize=(20, 10))
                    fig1.add_subplot(1, 2, 1)
                    plt.imshow(new_img)
                    plt.title("Text Detection with fine-tuned EAST")
                    fig1.add_subplot(1, 2, 2)
                    plt.imshow(new_img1)
                    plt.title('Text Detection Results Comparison')
                    plt.show()
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
                # time.sleep(.100)
            else:
                break
            # evaluation on ret and gt
        P_test.append(np.array(P_video, dtype=np.float32))
        R_test.append(np.array(R_video, dtype=np.float32))
        f1_test.append(np.array(f1_video, dtype=np.float32))
        print(P_video)
        print(R_video)
        print(f1_video)
        print("testing results are P:{}, R:{}, F1:{} on ".format(
            sum(P_video) / cnt_frame,
            sum(R_video) / cnt_frame,
            sum(f1_video) / cnt_frame) + video_set[k])
        cap.release()
        out.release()  # results refinement via NMS
        cv2.destroyAllWindows()
    print('here is the precision')
    for item in P_test:
        print(np.mean(item))
    print('here is the recall')
    for item in R_test:
        print(np.mean(item))
    print('here is the f-score')
    for item in f1_test:
        print(np.mean(item))
    print(video_set)
def main(argv=None):
    m_cfg = sys_cfg()
    config = get_config(FLAGS)
    config.batch_size = FLAGS.batch_size_per_gpu * FLAGS.num_gpus
    config.num_layers = 3
    config.num_steps = 5
    #
    eval_config = get_config(FLAGS)
    eval_config.batch_size = 2
    eval_config.num_layers = 3
    eval_config.num_steps = 5
    #============================ I. Model options ==============================#
    #>>>>>>>>>>>>>>>for PWCnet module network
    nn_opts = deepcopy(_DEFAULT_PWCNET_VAL_OPTIONS)
    if FLAGS.flownet_type is 'small':
        nn_opts['use_dense_cx'] = False
        nn_opts['use_res_cx'] = False
        nn_opts['pyr_lvls'] = 6
        nn_opts['flow_pred_lvl'] = 2
        nn_opts[
            'ckpt_path'] = '/work/cascades/lxiaol9/ARC/PWC/checkpoints/pwcnet-sm-6-2-multisteps-chairsthingsmix/pwcnet.ckpt-592000'  # Model to eval
    else:
        nn_opts['use_dense_cx'] = True
        nn_opts['use_res_cx'] = True
        nn_opts['pyr_lvls'] = 6
        nn_opts['flow_pred_lvl'] = 2
        nn_opts[
            'ckpt_path'] = '/work/cascades/lxiaol9/ARC/PWC/checkpoints/pwcnet-lg-6-2-multisteps-chairsthingsmix/pwcnet.ckpt-595000'

    nn_opts['verbose'] = True
    nn_opts['batch_size'] = 32  # This is Batch_size per GPU(16*4/2/2 = 16)
    nn_opts[
        'use_tf_data'] = False  # Don't use tf.data reader for this simple task
    nn_opts['gpu_devices'] = ['/device:GPU:0', '/device:GPU:1']  #
    nn_opts['controller'] = '/device:CPU:0'  # Evaluate on CPU or GPU?
    nn_opts['adapt_info'] = (1, 436, 1024, 2)
    nn_opts['x_shape'] = [2, 512, 512,
                          3]  # image pairs input shape [2, H, W, 3]
    nn_opts['y_shape'] = [512, 512, 2]  # u,v flows output shape [H, W, 2]
    #>>>>>>>>>>>>>>>> For EAST module network
    east_opts = {
        'verbose': True,
        'ckpt_path': FLAGS.pretrained_model_path,
        'batch_size': 40,
        'batch_size_per_gpu': 20,
        'gpu_devices': ['/device:GPU:0', '/device:GPU:1'],
        # controller device to put the model's variables on (usually, /cpu:0 or /gpu:0 -> try both!)
        'controller': '/device:CPU:0',
        'x_dtype': tf.float32,  # image pairs input type
        'x_shape': [512, 512, 3],  # image pairs input shape [2, H, W, 3]
        'y_score_shape': [128, 128, 1],  # u,v flows output type
        'y_geometry_shape': [128, 128, 5],  # u,v flows output shape [H, W, 2]
        'x_mask_shape': [128, 128, 1]
    }
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    #=============================== II. building graph for east + agg =================================#
    # 1.1 Input placeholders
    batch_size = FLAGS.batch_size_per_gpu * FLAGS.num_gpus
    len_seq = FLAGS.num_steps
    # input_images = tf.placeholder(tf.float32, shape=[batch_size*len_seq, 512, 512, 3], name='input_images')
    input_feat_maps = tf.placeholder(tf.float32,
                                     shape=[batch_size, len_seq, 128, 128, 32],
                                     name='input_feature_maps')
    input_flow_maps = tf.placeholder(
        tf.float32,
        shape=[batch_size, len_seq - 1, 128, 128, 2],
        name='input_flow_maps')
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[batch_size, len_seq, 128, 128, 1],
                                      name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(
            tf.float32,
            shape=[batch_size, len_seq, 128, 128, 5],
            name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(
            tf.float32,
            shape=[batch_size, len_seq, 128, 128, 8],
            name='input_geo_maps')
    input_training_masks = tf.placeholder(
        tf.float32,
        shape=[batch_size, len_seq, 128, 128, 1],
        name='input_training_masks')
    # 1.2 lr & opt
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=500,
                                               decay_rate=0.8,
                                               staircase=True)
    opt = tf.train.AdamOptimizer(learning_rate)
    # 1.3 add summary
    tf.summary.scalar('learning_rate', learning_rate)
    # tf.summary.image('input_images', input_images[2:20:5, :, :, :])
    # 1.4 build graph in tf
    # input_images_split     = tf.split(input_images, FLAGS.num_gpus)
    input_flow_maps_split = tf.split(input_flow_maps, FLAGS.num_gpus)
    input_feature_split = tf.split(input_feat_maps, FLAGS.num_gpus)
    input_score_maps_split = tf.split(input_score_maps, FLAGS.num_gpus)
    input_geo_maps_split = tf.split(input_geo_maps, FLAGS.num_gpus)
    input_training_masks_split = tf.split(input_training_masks, FLAGS.num_gpus)

    tower_grads = []
    reuse_variables = None
    tvars = []
    gpus = list(range(len(FLAGS.gpu_list.split(','))))
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_feature_split[i]
                ifms = input_flow_maps_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                # model changed to recurrent one, we only need the recurrent loss returned
                total_loss, model_loss = model_gru_agg.tower_loss(
                    iis,
                    ifms,
                    isms,
                    igms,
                    itms,
                    gpu_id=gpu_id,
                    config=config,
                    reuse_variables=reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True
                # tvar1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='tiny_embed')
                # tvar2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='pred_module')
                # tvars = tvar1 + tvar2
                # , var_list=tvars
                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)
    # 1.5 gradient parsering
    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    # 1.6 get training operations
    summary_op = tf.summary.merge_all()
    # variable_averages = tf.train.ExponentialMovingAverage(
    #     FLAGS.moving_average_decay, global_step)
    # variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies([apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')
    # 1.8 Saver & Session & Restore
    saver = tf.train.Saver(tf.global_variables())
    # sv = tf.train.Supervisor()
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())
    init = tf.global_variables_initializer()
    g = tf.get_default_graph()
    with g.as_default():
        config1 = tf.ConfigProto()
        config1.gpu_options.allow_growth = True
        config1.allow_soft_placement = True
        sess1 = tf.Session(config=config1)
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = FLAGS.prev_checkpoint_path
            saver.restore(sess1, ckpt)
        else:
            sess1.run(init)
            var_list1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope='multi_rnn_cell')
            # var_list2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='pred_module')
            var_list_part1 = var_list1
            saver_alter1 = tf.train.Saver(
                {v.op.name: v
                 for v in var_list_part1})
            # # var_list3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='tiny_embed')
            # # var_list4 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='pred_module')
            # # var_list_part2 = var_list3 + var_list4
            # # saver_alter2 = tf.train.Saver({v.op.name: v for v in var_list_part2})
            print('continue training from previous weights')
            ckpt1 = FLAGS.prev_checkpoint_path
            print('Restore from {}'.format(ckpt1))
            saver_alter1.restore(sess1, ckpt1)
            # # print('continue training from previous Flow weights')
            # # ckpt2 = FLAGS.prev_checkpoint_path
            # # print('Restore from {}'.format(ckpt2))
            # # saver_alter2.restore(sess1, ckpt2)


#============================= III. Other necessary componets before training =============================#
    print("Step 1: AGG model has been reconstructed")
    GPUtil.showUtilization()
    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>> EAST model >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> #
    east_net = model_flow_east.EAST(mode='test', options=east_opts)
    print("Step 2: EAST model has been reconstructed")
    GPUtil.showUtilization()
    # >>>>>>>>>>>>>>>>>>>>>>>>>>>>> PWCnet model >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>#
    nn = ModelPWCNet(mode='test', options=nn_opts)
    print("Step 3: PWC model has been reconstructed")
    GPUtil.showUtilization()
    train_data_generator = icdar_smart.get_batch_seq(
        num_workers=FLAGS.num_readers, config=config, is_training=True)
    # val_data_generator = icdar.get_batch_seq(num_workers=FLAGS.num_readers, config=eval_config, is_training=False)
    start = time.time()
    #============================= IV. Training over Steps(!!!)================================================#
    print("Now we're starting training!!!")
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)
    for step in range(FLAGS.max_steps):
        #>>>>>>>>>>>>> data
        if FLAGS.mode == "debug":
            data = []
            data.append(
                np.ones((config.batch_size, FLAGS.num_steps, 512, 512, 3),
                        dtype=np.float32))
            data.append(
                np.ones((batch_size, len_seq, 128, 128, 1), dtype=np.float32))
            data.append(
                np.ones((batch_size, len_seq, 128, 128, 5), dtype=np.float32))
            data.append(
                np.ones((batch_size, len_seq, 128, 128, 1), dtype=np.float32))
        else:
            data = next(train_data_generator)

        if step < 10:
            print("Data ready!!!")
            plt.figure(dpi=300)
            ax = plt.subplot(121)
            plt.imshow(data[1][0][0, :, :, 0])
            plt.title("score map")
            ax = plt.subplot(122)
            plt.imshow(data[3][0][0, :, :, 0] * 255)
            plt.title("training mask")
            print("saving figure")
            plt.savefig("/home/lxiaol9/debug/running/" + str(step) + ".png")
        east_feed = np.reshape(data[0], [-1, 512, 512, 3])
        target_frame = np.reshape(
            np.array(data[0])[:, 0:4, :, :, :], [-1, 512, 512, 3])
        source_frame = np.reshape(
            np.array(data[0])[:, 1:5, :, :, :], [-1, 512, 512, 3])
        flow_feed = np.concatenate((source_frame[:, np.newaxis, :, :, :],
                                    target_frame[:, np.newaxis, :, :, :]),
                                   axis=1)
        flow_maps_stack = []
        # >>>>>>>>>>>>>>>>>>>>>>>>>>> feature extraction with EAST >>>>>>>>>>>>>>>>>>>>>>>> #
        rounds = int(east_feed.shape[0] / east_opts['batch_size'])
        feature_stack = []
        flow_maps_stack = []
        for r in range(rounds):
            feature_stack.append(
                east_net.sess.run(
                    [east_net.y_hat_test_tnsr],
                    feed_dict={
                        east_net.x_tnsr:
                        east_feed[r * east_opts['batch_size']:(r + 1) *
                                  east_opts['batch_size'], :, :, :]
                    })[0][0])
        feature_maps = np.concatenate(feature_stack, axis=0)
        feature_maps_reshape = np.reshape(feature_maps,
                                          [-1, config.num_steps, 128, 128, 32])
        #>>>>>>>>>>>>>>> flow estimation with PWCnet
        # x: [batch_size,2,H,W,3] uint8; x_adapt: [batch_size,2,H,W,3] float32
        x_adapt, x_adapt_info = nn.adapt_x(flow_feed)
        if x_adapt_info is not None:
            y_adapt_info = (x_adapt_info[0], x_adapt_info[2], x_adapt_info[3],
                            2)
        else:
            y_adapt_info = None
        mini_batch = nn_opts['batch_size'] * nn.num_gpus
        rounds = int(flow_feed.shape[0] / mini_batch)
        for r in range(rounds):
            feed_dict = {
                nn.x_tnsr:
                x_adapt[r * mini_batch:(r + 1) * mini_batch, :, :, :, :]
            }
            y_hat = nn.sess.run(nn.y_hat_test_tnsr, feed_dict=feed_dict)
            if FLAGS.mode == "debug":
                print(
                    "Step 5: now finish running one round of PWCnet for flow estimation"
                )
                GPUtil.showUtilization()
            y_hats, _ = nn.postproc_y_hat_test(
                y_hat, y_adapt_info)  # suppose to be [batch, height, width, 2]
            flow_maps_stack.append(y_hats[:, 1::4, 1::4, :] / 4)
        flow_maps = np.concatenate(flow_maps_stack, axis=0)
        # print("flow maps has shape ", flow_maps.shape[:])
        flow_maps = np.reshape(flow_maps,
                               [-1, FLAGS.num_steps - 1, 128, 128, 2])
        #>>>>>>>>>>>>>>> running training session
        with g.as_default():
            ml, tl, _ = sess1.run([model_loss, total_loss, train_op], \
                                        feed_dict={input_feat_maps: feature_maps_reshape,
                                                   input_score_maps: data[1],
                                                   input_geo_maps: data[2],
                                                   input_training_masks: data[3],
                                                   input_flow_maps: flow_maps
                                                   })
            if FLAGS.mode == "debug":
                print("Step 6: running one round on training!!!")
                GPUtil.showUtilization()
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break
            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess1,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess1.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_feat_maps: feature_maps_reshape,
                        input_score_maps: data[1],
                        input_geo_maps: data[2],
                        input_training_masks: data[3],
                        input_flow_maps: flow_maps
                    })
                summary_writer.add_summary(summary_str, global_step=step)
示例#6
0
def main(_):
    # check the running platform
    if platform.uname()[1] != 'dragonx-H97N-WIFI':
        print("Now it knows it's in a remote cluster")
        FLAGS.system = "remote"
        FLAGS.data_path = "/home/lxiaol9/ARC/EASTRNN/data/ICDAR/train"
        FLAGS.vis_path = "/home/lxiaol9/ARC/EASTRNN/vis/LSTM/"
        FLAGS.save_path = "/work/cascades/lxiaol9/ARC/EAST/checkpoints/LSTM_east/" + now.strftime(
            "%Y%m%d-%H%M%S")
        FLAGS.video_path = "/home/lxiaol9/ARC/EASTRNN/data/ICDAR2015/train/"
        FLAGS.checkpoints_path = FLAGS.save_path
        FLAGS.pretrained_model_path = "/work/cascades/lxiaol9/ARC/EAST/checkpoints/east/20180921-135717/"
    print(
        "############## Step1: The environment path has been set up ###############"
    )
    gpus = [
        x.name for x in device_lib.list_local_devices()
        if x.device_type == "GPU"
    ]
    if (FLAGS.num_gpus > len(gpus)):
        raise ValueError("Your machine has only %d gpus "
                         "which is less than the requested --num_gpus=%d." %
                         (len(gpus), FLAGS.num_gpus))
    # Model parameters using config
    config = get_config(FLAGS)
    config.batch_size = 1
    config.num_layers = 3
    config.num_steps = 10
    eval_config = get_config(FLAGS)
    eval_config.batch_size = 1
    eval_config.num_layers = 3
    eval_config.num_steps = 10
    with tf.Graph().as_default():
        # Global initializer for Variables in the model
        # log: May 3rd, we need to adapt the model input, with config
        with tf.name_scope("Train"):
            # use placeholder to stand for input and targets
            initializer = tf.random_normal_initializer()
            x_train = tf.placeholder(
                tf.float32, shape=[None, config.num_steps, None, None, 3])
            m = ArrayModel(True,
                           config,
                           x_train,
                           reuse_variables=None,
                           initializer=initializer)
            print("finished Training model generation")
            training_score = tf.summary.image(
                'score_map', m.input_score_maps[0, :, :, :, :])
            training_score_pred = tf.summary.image(
                'score_map_pred',
                tf.stack(m.score_map_set, 0) * 255)
            training_cost_sum = tf.summary.scalar("Loss", m.cost)
            training_lr = tf.summary.scalar("Learning_Rate", m.lr)
            training_input = tf.summary.image('input_images',
                                              m.input_data[0, :, :, :, :])
            training_loss_aabb = tf.summary.scalar('geometry_AABB',
                                                   m.loss_aabb)
            training_loss_theta = tf.summary.scalar('geometry_theta',
                                                    m.loss_theta)
            training_loss_cls = tf.summary.scalar('classification_dice_loss',
                                                  m.loss_cls)
            m.summary_merged = tf.summary.merge([
                training_lr, training_cost_sum, training_loss_aabb,
                training_loss_theta, training_loss_cls, training_input,
                training_score, training_score_pred
            ])
        with tf.name_scope("Val"):
            # use placeholder to stand for input and targets
            initializer = tf.random_normal_initializer()
            x_val = tf.placeholder(
                tf.float32, shape=[None, eval_config.num_steps, None, None, 3])
            mvalid = ArrayModel(True,
                                eval_config,
                                x_val,
                                reuse_variables=True,
                                initializer=initializer)
            val_cost_sum = tf.summary.scalar("Loss", mvalid.cost)
            val_score = tf.summary.image(
                'score_map', mvalid.input_score_maps[0, :, :, :, :])
            val_score_pred = tf.summary.image(
                'score_map_pred',
                tf.stack(mvalid.score_map_set, 0) * 255)
            val_input = tf.summary.image('input_images',
                                         mvalid.input_data[0, :, :, :, :])
            val_loss_aabb = tf.summary.scalar('geometry_AABB',
                                              mvalid.loss_aabb)
            val_loss_theta = tf.summary.scalar('geometry_theta',
                                               mvalid.loss_theta)
            val_loss_cls = tf.summary.scalar('classification_dice_loss',
                                             mvalid.loss_cls)
            mvalid.summary_merged = tf.summary.merge([
                val_cost_sum, val_score, val_loss_aabb, val_loss_theta,
                val_loss_cls, val_score_pred, val_input
            ])

        # Now we have got our models ready, so create a dictionFLAGSary to store those computational graph
        models = {"Train": m}
        # Module 2
        print("#############Step 2: models has been built############")
        for name, model in models.items():
            model.export_ops(name)
        metagraph = tf.train.export_meta_graph()
        soft_placement = False
        # we could also do coding in parallel
        if FLAGS.num_gpus > 1:
            soft_placement = True
            util.auto_parallel(metagraph, m)
        # if FLAGS.pretrained_model_path is not None:
        #     checkpoint_path = FLAGS.pretrained_model_path
        #     ckpt_state = tf.train.get_checkpoint_state(checkpoint_path )
        #     ckpt = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
        #     # 1. one way is to wrap original weights and change the variable names
        #     variables_to_restore = slim.get_variables_to_restore(exclude=['fc6', 'fc7', 'fc8'])
        #     variable_restore_op = slim.assign_from_checkpoint(ckpt, slim.get_trainable_variables(),
        #                                                          ignore_missing_vars=True)
        # Sep-18th, Try using a different saver to only initialize part of the model
        var_list1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='feature_fusion')
        var_list2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='resnet_v1_50')
        var_list = var_list1 + var_list2
        saver_alter = tf.train.Saver({v.op.name: v for v in var_list})
        config_proto = tf.ConfigProto(allow_soft_placement=soft_placement)
        #########################changes made up here######################
        train_data_generator = icdar.get_batch_seq(
            num_workers=FLAGS.num_readers, config=config, is_training=True)
        val_data_generator = icdar.get_batch_seq(num_workers=FLAGS.num_readers,
                                                 config=eval_config,
                                                 is_training=False)
        print(
            "##############Step 3: Heatmap, GT data is ready now################"
        )
        sv = tf.train.Supervisor()
        with sv.managed_session(config=config_proto) as session:
            if FLAGS.restore:
                print('continue training from previous checkpoint')
                ckpt = tf.train.latest_checkpoint(FLAGS.checkpoints_path)
                sv.saver.restore(session, ckpt)
            elif FLAGS.partially_restore:
                print('continue training from previous EAST checkpoint')
                ckpt = FLAGS.pretrained_model_path + 'model.ckpt-56092'
                logger.info('Restore from {}'.format(ckpt))
                saver_alter.restore(session, ckpt)
            else:
                if FLAGS.pretrained_model_path is not None:
                    variable_restore_op(session)
            train_writer = tf.summary.FileWriter(FLAGS.save_path + '/train/',
                                                 session.graph)
            val_writer = tf.summary.FileWriter(FLAGS.save_path + '/val/')
            print("###########Step 4 : start training. ###########")
            for i in range(config.max_steps):
                lr_decay = config.lr_decay**max(i + 1 - config.max_steps, 0.0)
                m.assign_lr(session, FLAGS.learning_rate * lr_decay)
                data_train = next(train_data_generator)
                # apply training along the way
                print("Step: %d Learning Rate: %.5f" %
                      (i + 1, session.run(m.lr)))
                train_loss = run_step(session,
                                      m,
                                      data_train,
                                      config,
                                      i,
                                      eval_op=m.train_op,
                                      summary_writer=train_writer,
                                      verbose=True)
                print("Step: %d training loss: %.5f" % (i + 1, train_loss))
                if i % FLAGS.perform_val_steps == 0:
                    data_val = next(val_data_generator)
                    valid_loss = run_step(session,
                                          mvalid,
                                          data_val,
                                          eval_config,
                                          i,
                                          summary_writer=val_writer,
                                          verbose=True)
                    print("Step: %d Valid loss: %.5f" % (i + 1, valid_loss))
                if (i % FLAGS.save_checkpoint_steps == 0) and FLAGS.save_path:
                    print("Saving model to %s." % FLAGS.save_path)
                    sv.saver.save(session,
                                  FLAGS.save_path,
                                  global_step=sv.global_step)
示例#7
0
     FLAGS.save_path = "/work/cascades/lxiaol9/ARC/EAST/checkpoints/LSTM_east/" + now.strftime("%Y%m%d-%H%M%S")
     FLAGS.video_path = "/home/lxiaol9/ARC/EASTRNN/data/ICDAR2013/train/"
     FLAGS.checkpoints_path = FLAGS.save_path
     FLAGS.pretrained_model_path = "/work/cascades/lxiaol9/ARC/EAST/checkpoints/east/20180921-173054/"
 print("############## Step1: The environment path has been set up ###############")
 gpus = [x.name for x in device_lib.list_local_devices() if x.device_type == "GPU"]
 if (FLAGS.num_gpus > len(gpus)):
     raise ValueError("Your machine has only %d gpus "
     "which is less than the requested --num_gpus=%d."
     % (len(gpus), FLAGS.num_gpus))
 config = get_config(FLAGS)
 config.batch_size = 8
 config.num_layers = 3
 config.num_steps  = 10))
 # Model parameters using config
 config = get_config(FLAGS)
 config.batch_size = 8
 config.num_layers = 3
 config.num_steps  = 10
 eval_config = get_config(FLAGS)
 eval_config.batch_size = 8
 eval_config.num_layers = 3
 eval_config.num_steps  = 10
 with tf.Graph().as_default():
     # Global initializer for Variables in the model
     # log: May 3rd, we need to adapt the model input, with config
     with tf.name_scope("Train"):
         # use placeholder to stand for input and targets
         initializer = tf.random_normal_initializer()
         x_train = tf.placeholder(tf.float32, shape=[None, config.num_steps, None, None, 3])
         m = ArrayModel(True, config, x_train, reuse_variables=None, initializer=initializer)