def _get_test_dataset(self):
        """
        Reads TFRecords, decode and batches them
        :return: callable
        """

        path = os.path.join(self._test_out_dir, "*.tfrecords")
        path = path.replace("//", "/")
        files = glob.glob(pathname=path)

        assert len(files) > 0

        # TF dataset APIs
        # dataset = tf.data.TFRecordDataset(files, num_parallel_reads=self._num_cores)

        files = tf.data.Dataset.list_files(path)

        # TF dataset APIs
        # dataset = tf.data.TFRecordDataset(files, num_parallel_reads=self._num_cores)
        dataset = files.interleave(
            tf.data.TFRecordDataset,
            cycle_length=self._num_cores,
            num_parallel_calls=tf.data.experimental.AUTOTUNE)

        # Map the generator output as features as a dict and labels
        dataset = dataset.map(self.decode)

        dataset = dataset.batch(batch_size=self._hparams.batch_size,
                                drop_remainder=False)
        # dataset = dataset.shuffle(self._prefetch_size * 2, 42)
        dataset = dataset.prefetch(self._prefetch_size)
        # dataset = dataset.repeat()
        print_info("Dataset output sizes are: ")
        print_info(dataset)
        return dataset
    def _prepare_val_dataset(self):
        """
        Reads TFRecords, decode and batches them
        :return: callable
        """
        print_info("_get_val_dataset")
        memory_usage_psutil()
        path = os.path.join(self._val_out_dir, "*.tfrecords")
        path = path.replace("//", "/")
        # train_tfrecord_files = glob.glob(pathname=path)

        val_tfrecord_files = tf.data.Dataset.list_files(path)

        # TF dataset APIs
        # dataset = tf.data.TFRecordDataset(files, num_parallel_reads=self._num_cores)
        dataset = val_tfrecord_files.interleave(
            tf.data.TFRecordDataset,
            cycle_length=self._num_cores,
            num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.shuffle(self._batch_size * 10, 42)
        # Map the generator output as features as a dict and labels
        dataset = dataset.map(map_func=self.decode,
                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.batch(batch_size=self._batch_size,
                                drop_remainder=False)
        self._val_dataset = dataset.prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE)
    def _get_val_dataset(self):
        self._prepare_val_dataset()

        print_info("Dataset output sizes are: ")
        print_info(self._val_dataset)
        memory_usage_psutil()
        # iterator = self._val_dataset.make_one_shot_iterator()
        #
        # batch_feats, batch_labels = iterator.get_next()

        return self._val_dataset
 def get_number_steps_per_epcoh(self, num_train_examples):
     res = num_train_examples // self._batch_size
     print("\n\n\n\n\n")
     print_info(
         ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
     )
     print_info(f"Number of examples per epoch is {num_train_examples}")
     print_info(f"Batch size is {self._batch_size}")
     print_info(f"Number of steps per epoch is {res}")
     print_info(
         "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
     )
     print("\n\n\n\n\n")
     return res
示例#5
0
def main(args):

    memory_used = []
    process = psutil.Process(os.getpid())

    #TODO add into argparser
    IS_EAST_IMAGE_TEST = True

    NUM_ARRAYS_PER_FILE = 10000

    #TODO decode function needs this value as part of dataset map function,  hence for now harcoded value
    # if needed chnage manually at func `numpy_array_decode` in dummy_dataset.py also
    NUM_FEATURES = 250

    NUM_IMAGES_PER_FILE = 8

    BATCH_SIZE = 4
    TRAIN_DATA = os.getcwd() + "/data/train_data_img"
    VAL_DATA = os.getcwd() + "/data/val_data_img"
    MODEL_DIR = os.getcwd() + "/data/" + "east_net"
    EXPORT_DIR = MODEL_DIR + "/" + "export"
    NUM_EPOCHS = 3
    NUM_SAMPLES_PER_FILE = NUM_IMAGES_PER_FILE

    if args["dataset"] == "numpy":
        IS_EAST_IMAGE_TEST = False
        BATCH_SIZE = 128
        TRAIN_DATA = os.getcwd() + "/data/train_data"
        VAL_DATA = os.getcwd() + "/data/val_data"
        MODEL_DIR = os.getcwd() + "/" + "data/fwd_nnet"
        EXPORT_DIR = MODEL_DIR + "/" + "export"
        NUM_EPOCHS = 3
        NUM_SAMPLES_PER_FILE = NUM_ARRAYS_PER_FILE
    elif args["dataset"] == "east":
        pass
    else:
        print_error("Invalid dataset")

    TOTAL_STEPS_PER_FILE = NUM_SAMPLES_PER_FILE / BATCH_SIZE

    if args["delete"] == True:
        print_info("Deleting old data files")
        shutil.rmtree(TRAIN_DATA)
        shutil.rmtree(VAL_DATA)

    gen_data(IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST,
             TRAIN_DATA=TRAIN_DATA,
             VAL_DATA=VAL_DATA,
             NUM_SAMPLES_PER_FILE=NUM_SAMPLES_PER_FILE,
             NUM_FEATURES=NUM_FEATURES,
             number_files=int(args["num_tfrecord_files"]))

    if args["mode"] == "test_iterator":
        print('objgraph growth list start')
        objgraph.show_growth(limit=50)
        print('objgraph growth list end')

        test_dataset(data_path=TRAIN_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
        test_dataset(data_path=TRAIN_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
        test_dataset(data_path=VAL_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
        print('objgraph growth list start')
        objgraph.show_growth(limit=50)
        print('objgraph growth list end')

        return

    # print(dataset_to_iterator(data_path=TRAIN_DATA))

    if IS_EAST_IMAGE_TEST:
        model = EASTTFModel(model_root_directory="store")
    else:
        model = NNet()

    estimator = tf.estimator.Estimator(
        model_fn=model,
        config=_init_tf_config(TOTAL_STEPS_PER_FILE=TOTAL_STEPS_PER_FILE,
                               MODEL_DIR=MODEL_DIR),
        params=None)
    memory_usage_psutil()
    print('objgraph growth list start')
    objgraph.show_growth(limit=50)
    print('objgraph growth list end')

    # print(objgraph.get_leaking_objects())

    # for epoch in tqdm(range(NUM_EPOCHS)):

    print("\n\n\n\n\n\n")
    print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> New Epoch")
    memory_usage_psutil()
    # memory_used.append(process.memory_info()[0] / float(2 ** 20))
    print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Training")
    # train(estimator=estimator,
    #       TRAIN_DATA=TRAIN_DATA,
    #       BATCH_SIZE=BATCH_SIZE,
    #       IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)
    # print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Evaluating")
    # evaluate(estimator=estimator,
    #          VAL_DATA=VAL_DATA,
    #          BATCH_SIZE=BATCH_SIZE,
    #          IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST)

    train_n_evaluate(estimator=estimator,
                     TRAIN_DATA=TRAIN_DATA,
                     VAL_DATA=VAL_DATA,
                     BATCH_SIZE=BATCH_SIZE,
                     IS_EAST_IMAGE_TEST=IS_EAST_IMAGE_TEST,
                     max_steps=None,
                     NUM_EPOCHS=NUM_EPOCHS)

    print('objgraph growth list start')
    objgraph.show_growth(limit=50)
    print('objgraph growth list end')
    memory_usage_psutil()

    # plt.plot(memory_used)
    # plt.title('Evolution of memory')
    # plt.xlabel('iteration')
    # plt.ylabel('memory used (MB)')
    # plt.savefig("logs/" + args["dataset"] + "_dataset_memory_usage.png")
    # plt.show()

    print_error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> New Epoch")
    export_model(estimator=estimator,
                 model_export_path=EXPORT_DIR,
                 IS_EAST_MODEL=IS_EAST_IMAGE_TEST)

    (objgraph.get_leaking_objects())
def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(2, 2)):
    """A block that has a conv layer at shortcut.

    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of
            middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
        strides: Strides for the first conv layer in the block.

    # Returns
        Output tensor for the block.

    Note that from stage 3,
    the first conv layer at main path is with strides=(2, 2)
    And the shortcut should have strides=(2, 2) as well
    """
    print_warn(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> conv block")

    filters1, filters2, filters3 = filters

    bn_axis = 3
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    print_info(input_tensor)
    # >>>>>>>>>>>>>>>>>
    x = layers.Conv2D(filters1, (1, 1),
                      strides=strides,
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    print_info(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    print_info(x)
    x = layers.Activation('relu')(x)
    print_info(x)

    # >>>>>>>>>>>>>>>>>
    x = layers.Conv2D(filters2,
                      kernel_size,
                      padding='same',
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    print_info(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    print_info(x)
    x = layers.Activation('relu')(x)
    print_info(x)

    # >>>>>>>>>>>>>>>>>
    x = layers.Conv2D(filters3, (1, 1),
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    print_info(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
    print_info(x)

    # >>>>>>>>>>>>>>>>>
    shortcut = layers.Conv2D(filters3, (1, 1),
                             strides=strides,
                             kernel_initializer='he_normal',
                             name=conv_name_base + '1')(input_tensor)
    print_info(shortcut)
    shortcut = layers.BatchNormalization(axis=bn_axis,
                                         name=bn_name_base + '1')(shortcut)
    print_info(shortcut)

    x = layers.add([x, shortcut])
    print_info(x)
    x = layers.Activation('relu')(x)
    print_info(x)

    return x
def model(images, text_scale=512, weight_decay=1e-5, is_training=True):
    """
    define the model, we use Keras implemention of resnet
    """
    images = mean_image_subtraction(images)

    bn_axis = 3

    end_points = dict()

    print_warn(">>>>>>>>>>>>>>> Model Definition Started: ")
    print_warn(images)
    # http://ethereon.github.io/netscope/#/gist/db945b393d40bfa26006
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(images)
    print_warn(x)
    x = layers.Conv2D(64, (7, 7),
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      name='conv1')(x)
    print_warn(x)
    x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    print_warn(x)
    x = layers.Activation('relu')(x)
    print_warn(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    print_warn(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
    print_warn(x)

    print_warn(">>>>>>>>>>>>>>> Resnet Definition Started: ")
    print_warn(">>>>> pool2")
    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    print_warn(x)
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    print_warn(x)
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    print_warn(x)

    end_points["pool2"] = x

    print_warn(">>>>> pool3")
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    print_warn(x)
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    print_warn(x)
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    print_warn(x)
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
    print_warn(x)
    end_points["pool3"] = x

    print_warn(">>>>> pool4")

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    print_warn(x)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    print_warn(x)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    print_warn(x)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    print_warn(x)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    print_warn(x)
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
    print_info(x)
    end_points["pool4"] = x

    print_warn(">>>>> pool5")
    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    print_warn(x)
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    print_warn(x)
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    print_warn(x)

    end_points["pool5"] = x

    f = [
        end_points['pool5'], end_points['pool4'], end_points['pool3'],
        end_points['pool2']
    ]

    for i in range(4):
        logging.info('Shape of f_{} : {}'.format(i, f[i].shape))

    g = [None, None, None, None]
    h = [None, None, None, None]
    num_outputs = [None, 128, 64, 32]

    for i in range(4):
        if i == 0:
            h[i] = f[i]
        else:
            c1_1 = layers.Conv2D(filters=num_outputs[i],
                                 kernel_size=1)(tf.concat([g[i - 1], f[i]],
                                                          axis=-1))
            # slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
            h[i] = layers.Conv2D(filters=num_outputs[i],
                                 kernel_size=3,
                                 padding="same")(c1_1)  #TODO kernel size to 3
            # slim.conv2d(c1_1, num_outputs[i], 3)
        if i <= 2:
            g[i] = unpool(h[i])
        else:
            g[i] = layers.Conv2D(filters=num_outputs[i],
                                 kernel_size=3,
                                 padding="same")(h[i])  #TODO kernel size to 3
            # slim.conv2d(h[i], num_outputs[i], 3)
        logging.info('Shape of h_{} : {}, g_{} : {}'.format(
            i, h[i].shape, i, g[i].shape))

    # here we use a slightly different way for regression part,
    # we first use a sigmoid to limit the regression range, and also
    # this is do with the angle map
    F_score = layers.Conv2D(filters=1, kernel_size=1,
                            activation=tf.nn.sigmoid)(g[3])
    # slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
    # 4 channel of axis aligned bbox and 1 channel rotation angle
    geo_map = layers.Conv2D(filters=4, kernel_size=1,
                            activation=tf.nn.sigmoid)(g[3])
    # slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
    angle_map = layers.Conv2D(filters=1,
                              kernel_size=1,
                              activation=tf.nn.sigmoid)(g[3])
    # (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
    F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
def identity_block(input_tensor, kernel_size, filters, stage, block):
    """The identity block is the block that has no conv layer at shortcut.

    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of
            middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names

    # Returns
        Output tensor for the block.
    """
    print_warn(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> identity block")
    filters1, filters2, filters3 = filters
    bn_axis = 3

    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    print_info(input_tensor)
    # >>>>>>>>>>>>>>>>>
    x = layers.Conv2D(filters1, (1, 1),
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2a')(input_tensor)
    print_info(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    print_info(x)
    x = layers.Activation('relu')(x)
    print_info(x)

    # >>>>>>>>>>>>>>>>>
    x = layers.Conv2D(filters2,
                      kernel_size,
                      padding='same',
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2b')(x)
    print_info(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    print_info(x)
    x = layers.Activation('relu')(x)
    print_info(x)

    # >>>>>>>>>>>>>>>>>
    x = layers.Conv2D(filters3, (1, 1),
                      kernel_initializer='he_normal',
                      name=conv_name_base + '2c')(x)
    print_info(x)
    x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
    print_info(x)
    x = layers.add([x, input_tensor])
    print_info(x)
    x = layers.Activation('relu')(x)
    print_info(x)

    return x
def generator(data_path,
              geometry,
              min_crop_side_ratio,
              min_text_size,
              input_size=512,
              batch_size=4,
              background_ratio=3. / 8,
              random_scale=np.array([0.5, 1, 2.0, 3.0]),
              vis=False):
    image_list = np.array(get_images(data_path))
    print_info('{} training images in {}'.format(image_list.shape[0],
                                                 data_path))
    index = np.arange(0, image_list.shape[0])
    while True:
        np.random.shuffle(index)
        images = []
        image_fns = []
        score_maps = []
        geo_maps = []
        training_masks = []
        for i in index:
            try:
                im_fn = image_list[i]
                im = cv2.imread(im_fn)
                # print im_fn
                h, w, _ = im.shape
                txt_fn = im_fn.replace(
                    os.path.basename(im_fn).split('.')[1], 'txt')
                print_info(
                    f"Imgae file name : {im_fn} and text file name {txt_fn}")
                if not os.path.exists(txt_fn):
                    print('text file {} does not exists'.format(txt_fn))
                    continue

                text_polys, text_tags = load_annoataion(txt_fn)

                text_polys, text_tags = check_and_validate_polys(
                    text_polys, text_tags, (h, w))
                # if text_polys.shape[0] == 0:
                #     continue
                # random scale this image
                rd_scale = np.random.choice(random_scale)
                im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
                text_polys *= rd_scale
                # print rd_scale
                # random crop a area from image
                if np.random.rand() < background_ratio:
                    # crop background
                    im, text_polys, text_tags = crop_area(
                        im=im,
                        polys=text_polys,
                        tags=text_tags,
                        min_crop_side_ratio=min_crop_side_ratio,
                        crop_background=False,
                        max_tries=50)

                    #(im, text_polys, text_tags, crop_background=True)
                    if text_polys.shape[0] > 0:
                        # cannot find background
                        continue
                    # pad and resize image
                    new_h, new_w, _ = im.shape
                    max_h_w_i = np.max([new_h, new_w, input_size])
                    im_padded = np.zeros((max_h_w_i, max_h_w_i, 3),
                                         dtype=np.uint8)
                    im_padded[:new_h, :new_w, :] = im.copy()
                    im = cv2.resize(im_padded, dsize=(input_size, input_size))
                    score_map = np.zeros((input_size, input_size),
                                         dtype=np.uint8)
                    geo_map_channels = 5 if geometry == 'RBOX' else 8
                    geo_map = np.zeros(
                        (input_size, input_size, geo_map_channels),
                        dtype=np.float32)
                    training_mask = np.ones((input_size, input_size),
                                            dtype=np.uint8)
                else:
                    im, text_polys, text_tags = crop_area(
                        im=im,
                        polys=text_polys,
                        tags=text_tags,
                        min_crop_side_ratio=min_crop_side_ratio,
                        crop_background=False,
                        max_tries=50)
                    if text_polys.shape[0] == 0:
                        continue
                    h, w, _ = im.shape

                    # pad the image to the training input size or the longer side of image
                    new_h, new_w, _ = im.shape
                    max_h_w_i = np.max([new_h, new_w, input_size])
                    im_padded = np.zeros((max_h_w_i, max_h_w_i, 3),
                                         dtype=np.uint8)
                    im_padded[:new_h, :new_w, :] = im.copy()
                    im = im_padded
                    # resize the image to input size
                    new_h, new_w, _ = im.shape
                    resize_h = input_size
                    resize_w = input_size
                    im = cv2.resize(im, dsize=(resize_w, resize_h))
                    resize_ratio_3_x = resize_w / float(new_w)
                    resize_ratio_3_y = resize_h / float(new_h)
                    text_polys[:, :, 0] *= resize_ratio_3_x
                    text_polys[:, :, 1] *= resize_ratio_3_y
                    new_h, new_w, _ = im.shape
                    score_map, geo_map, training_mask = generate_rbox(
                        (new_h, new_w),
                        text_polys,
                        text_tags,
                        min_text_size=min_text_size)

                if vis:
                    fig, axs = plt.subplots(3, 2, figsize=(20, 30))
                    # axs[0].imshow(im[:, :, ::-1])
                    # axs[0].set_xticks([])
                    # axs[0].set_yticks([])
                    # for poly in text_polys:
                    #     poly_h = min(abs(poly[3, 1] - poly[0, 1]), abs(poly[2, 1] - poly[1, 1]))
                    #     poly_w = min(abs(poly[1, 0] - poly[0, 0]), abs(poly[2, 0] - poly[3, 0]))
                    #     axs[0].add_artist(Patches.Polygon(
                    #         poly * 4, facecolor='none', edgecolor='green', linewidth=2, linestyle='-', fill=True))
                    #     axs[0].text(poly[0, 0] * 4, poly[0, 1] * 4, '{:.0f}-{:.0f}'.format(poly_h * 4, poly_w * 4),
                    #                    color='purple')
                    # axs[1].imshow(score_map)
                    # axs[1].set_xticks([])
                    # axs[1].set_yticks([])
                    axs[0, 0].imshow(im[:, :, ::-1])
                    axs[0, 0].set_xticks([])
                    axs[0, 0].set_yticks([])
                    for poly in text_polys:
                        poly_h = min(abs(poly[3, 1] - poly[0, 1]),
                                     abs(poly[2, 1] - poly[1, 1]))
                        poly_w = min(abs(poly[1, 0] - poly[0, 0]),
                                     abs(poly[2, 0] - poly[3, 0]))
                        axs[0, 0].add_artist(
                            Patches.Polygon(poly,
                                            facecolor='none',
                                            edgecolor='green',
                                            linewidth=2,
                                            linestyle='-',
                                            fill=True))
                        axs[0, 0].text(poly[0, 0],
                                       poly[0, 1],
                                       '{:.0f}-{:.0f}'.format(poly_h, poly_w),
                                       color='purple')
                    axs[0, 1].imshow(score_map[::, ::])
                    axs[0, 1].set_xticks([])
                    axs[0, 1].set_yticks([])
                    axs[1, 0].imshow(geo_map[::, ::, 0])
                    axs[1, 0].set_xticks([])
                    axs[1, 0].set_yticks([])
                    axs[1, 1].imshow(geo_map[::, ::, 1])
                    axs[1, 1].set_xticks([])
                    axs[1, 1].set_yticks([])
                    axs[2, 0].imshow(geo_map[::, ::, 2])
                    axs[2, 0].set_xticks([])
                    axs[2, 0].set_yticks([])
                    axs[2, 1].imshow(training_mask[::, ::])
                    axs[2, 1].set_xticks([])
                    axs[2, 1].set_yticks([])
                    plt.tight_layout()
                    plt.show()
                    plt.close()

                images.append(im[:, :, ::-1].astype(np.float32))
                # image_fns.append(im_fn)
                score_maps.append(score_map[::4, ::4,
                                            np.newaxis].astype(np.float32))
                geo_maps.append(geo_map[::4, ::4, :].astype(np.float32))
                training_masks.append(
                    training_mask[::4, ::4, np.newaxis].astype(np.float32))

                if len(images) == batch_size:
                    #yield np.array(images), np.array(score_maps), np.array(geo_maps)
                    yield {
                        "images": np.array(images),
                        "score_maps": np.array(score_maps),
                        "geo_maps": np.array(geo_maps)
                    }, np.array(images)
                    images = []
                    score_maps = []
                    geo_maps = []
                    training_masks = []
            except Exception as e:
                import traceback
                traceback.print_exc()
                continue
示例#10
0
    def train(self, num_max_steps=None, num_epoch=None):
        assert (num_max_steps is not None
                and num_epoch is not None, "Use steps or epoch at a time")
        model_dir = self._model.model_dir
        # data parallel for multi-GPU
        model = self.load_model(self._stored_model)
        model.train()

        num_samples = len(self._dataset)  #TODO replace the dataset with actual
        batch_size = self._dataset._batch_size

        num_steps_per_epoch = num_samples // batch_size

        current_step = 0
        i = 0
        total_num_steps = -1

        if num_epoch:
            total_num_steps = num_steps_per_epoch * num_epoch

        if num_max_steps:
            total_num_steps = num_max_steps

        # loss averager
        loss_avg = Averager()

        train_dataset = self._dataset.train_set()

        start_time = time.time()
        best_accuracy = -1
        best_norm_ed = 1e+6

        while (current_step < total_num_steps):
            print_info("Current step {}".format(current_step))
            # train part
            image_tensors, labels = train_dataset.get_batch()
            images = image_tensors.to(TorchExecutor.device)
            cost = self._model.get_cost(model=self._model,
                                        features=images,
                                        labels=labels)
            optimizer = self._model.get_optimizer(model=model)

            model.zero_grad()
            cost.backward()
            grad_clip = 5  #TODO make as a param

            torch.nn.utils.clip_grad_norm_(
                model.parameters(),
                grad_clip)  # gradient clipping with 5 (Default)
            optimizer.step()

            loss_avg.add(cost)

            # validation part
            if i % self._validation_interval_steps == 0:
                elapsed_time = time.time() - start_time
                print(
                    f'[{i}/{self._max_train_steps}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}'
                )
                # for log

                if not os.path.exists(f"./store/{self._experiment_name}"):
                    os.makedirs(f"./store/{self._experiment_name}")

                with open(f'./store/{self._experiment_name}/log_train.txt',
                          'a') as log:
                    log.write(
                        f'[{i}/{self._max_train_steps}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n'
                    )
                    loss_avg.reset()

                    model.eval()
                    with torch.no_grad():
                        valid_loss, current_accuracy, current_norm_ed, \
                        preds, labels, infer_time, length_of_data = self.validation(model=model)
                    model.train()
                    #
                    # for pred, gt in zip(preds[:5], labels[:5]):
                    #     if 'Attn' in opt.Prediction:
                    #         pred = pred[:pred.find('[s]')]
                    #         gt = gt[:gt.find('[s]')]
                    #     print(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}')
                    #     log.write(f'{pred:20s}, gt: {gt:20s},   {str(pred == gt)}\n')

                    valid_log = f'[{i}/{self._max_train_steps}] valid loss: {valid_loss:0.5f}'
                    valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ed:0.2f}'
                    print(valid_log)
                    log.write(valid_log + '\n')

                    # keep best accuracy model
                    if current_accuracy > best_accuracy:
                        best_accuracy = current_accuracy
                        self.store_model(file_name="best_accuracy.pth",
                                         model=model)
                    if current_norm_ed < best_norm_ed:
                        best_norm_ed = current_norm_ed
                        self.store_model(file_name="best_norm_ed.pth",
                                         model=model)

                    best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ed: {best_norm_ed:0.2f}'
                    print(best_model_log)
                    log.write(best_model_log + '\n')

            # save model per 1e+5 iter.
            if (i + 1) % 1e+5 == 0:
                self.store_model(file_name=f"iter_{i + 1}.pth", model=model)

            if i == self._max_train_steps:
                print('end the training')
                sys.exit()

            i += 1
            current_step += 1