示例#1
0
def compute_anchors(angle):
    """
    compute angle offset and which bin the angle lies in
    input: fixed local orientation [0, 2pi]
    output: [bin number, angle offset]

    For two bins:

    if angle < pi, l = 0, r = 1
        if    angle < 1.65, return [0, angle]
        elif  pi - angle < 1.65, return [1, angle - pi]

    if angle > pi, l = 1, r = 2
        if    angle - pi < 1.65, return [1, angle - pi]
      elif     2pi - angle < 1.65, return [0, angle - 2pi]
    """
    anchors = []

    wedge = 2. * np.pi / cfg().bin  # 2pi / bin = pi
    l_index = int(angle / wedge)  # angle/pi
    r_index = l_index + 1

    # (angle - l_index*pi) < pi/2 * 1.05 = 1.65
    if (angle - l_index * wedge) < wedge / 2 * (1 + cfg().overlap / 2):
        anchors.append([l_index, angle - l_index * wedge])

    # (r*pi + pi - angle) < pi/2 * 1.05 = 1.65
    if (r_index * wedge - angle) < wedge / 2 * (1 + cfg().overlap / 2):
        anchors.append([r_index % cfg().bin, angle - r_index * wedge])

    return anchors
示例#2
0
def orientation_confidence_flip(image_data, dims_avg):
    for data in image_data:

        # minus the average dimensions
        data['dims'] = data['dims'] - dims_avg[data['name']]

        # fix orientation and confidence for no flip
        orientation = np.zeros((cfg().bin, 2))
        confidence = np.zeros(cfg().bin)

        anchors = compute_anchors(data['new_alpha'])

        for anchor in anchors:
            # each angle is represented in sin and cos
            orientation[anchor[0]] = np.array(
                [np.cos(anchor[1]), np.sin(anchor[1])])
            confidence[anchor[0]] = 1

        confidence = confidence / np.sum(confidence)

        data['orient'] = orientation
        data['conf'] = confidence

        # Fix orientation and confidence for random flip
        orientation = np.zeros((cfg().bin, 2))
        confidence = np.zeros(cfg().bin)

        anchors = compute_anchors(
            2. * np.pi - data['new_alpha'])  # compute orientation and bin
        # for flipped images

        for anchor in anchors:
            orientation[anchor[0]] = np.array(
                [np.cos(anchor[1]), np.sin(anchor[1])])
            confidence[anchor[0]] = 1

        confidence = confidence / np.sum(confidence)

        data['orient_flipped'] = orientation
        data['conf_flipped'] = confidence

    return image_data
def network():
    inputs = layers.Input(shape=(cfg().norm_h, cfg().norm_w, 3))

    x = _conv_block(inputs, 32, (3, 3), strides=(2, 2))

    x = _inverted_residual_block(x, 16, (3, 3), t=1, strides=1, n=1)
    x = _inverted_residual_block(x, 24, (3, 3), t=6, strides=2, n=2)
    x = _inverted_residual_block(x, 32, (3, 3), t=6, strides=2, n=3)
    x = _inverted_residual_block(x, 64, (3, 3), t=6, strides=2, n=4)
    x = _inverted_residual_block(x, 96, (3, 3), t=6, strides=1, n=3)
    x = _inverted_residual_block(x, 160, (3, 3), t=6, strides=2, n=3)
    x = _inverted_residual_block(x, 320, (3, 3), t=6, strides=1, n=1)

    x = _conv_block(x, 1280, (1, 1), strides=(1, 1))
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Reshape((1, 1, 1280))(x)
    x = layers.Dropout(0.3, name='Dropout')(x)

    # Dimensions branch
    dimensions = layers.Conv2D(3, (1, 1), padding='same', name='d_conv')(x)
    dimensions = layers.Reshape((3, ), name='dimensions')(dimensions)

    # Orientation branch
    orientation = layers.Conv2D(4, (1, 1), padding='same', name='o_conv')(x)
    orientation = layers.Reshape((cfg().bin, -1))(orientation)
    orientation = layers.Lambda(l2_normalize, name='orientation')(orientation)

    # Confidence branch
    confidence = layers.Conv2D(cfg().bin, (1, 1),
                               padding='same',
                               name='c_conv')(x)
    confidence = layers.Activation('softmax', name='softmax')(confidence)
    confidence = layers.Reshape((2, ), name='confidence')(confidence)

    # Build model
    model = tf.keras.Model(inputs, [dimensions, orientation, confidence])
    model.summary()

    return model
def data_gen(all_objs):
    '''
    generate data for training
    input: all_objs -- all objects used for training
           batch_size -- number of images used for training at once
    yield: x_batch -- (batch_size, 224, 224, 3),  input images to training process at each batch
           d_batch -- (batch_size, 3),  object dimensions
           o_batch -- (batch_size, 2, 2), object orientation
           c_batch -- (batch_size, 2), angle confidence
    '''
    num_obj = len(all_objs)

    keys = list(range(num_obj))
    np.random.shuffle(keys)

    l_bound = 0
    r_bound = cfg().batch_size if cfg().batch_size < num_obj else num_obj

    while True:
        if l_bound == r_bound:
            l_bound = 0
            r_bound = cfg().batch_size if cfg().batch_size < num_obj else num_obj
            np.random.shuffle(keys)

        currt_inst = 0
        x_batch = np.zeros((r_bound - l_bound, 224, 224, 3))
        d_batch = np.zeros((r_bound - l_bound, 3))
        o_batch = np.zeros((r_bound - l_bound, cfg().bin, 2))
        c_batch = np.zeros((r_bound - l_bound, cfg().bin))

        for key in keys[l_bound:r_bound]:
            # augment input image and fix object's orientation and confidence
            image, dimension, orientation, confidence = prepare_input_and_output(all_objs[key], all_objs[key]['image'],
                                                                                 )

            x_batch[currt_inst, :] = image
            d_batch[currt_inst, :] = dimension
            o_batch[currt_inst, :] = orientation
            c_batch[currt_inst, :] = confidence

            currt_inst += 1

        yield x_batch, [d_batch, o_batch, c_batch]

        l_bound = r_bound
        r_bound = r_bound + cfg().batch_size

        if r_bound > num_obj:
            r_bound = num_obj
def predict(args):
    # complie models
    model = nn.network()
    model.load_weights('3dbox_weights_mob.hdf5')
    # model.load_weights(args.w)

    # KITTI_train_gen = KITTILoader(subset='training')
    dims_avg, _ = KITTILoader(subset='tracklet').get_average_dimension()

    # list all the validation images
    if args.a == 'training':
        all_imgs = sorted(os.listdir(test_image_dir))
        val_index = int(len(all_imgs) * cfg().split)
        val_imgs = all_imgs[val_index:]

    else:
        val_imgs = sorted(os.listdir(test_image_dir))

    start_time = time.time()

    for i in val_imgs:
        image_file = test_image_dir + i
        label_file = test_label_dir + i.replace('png', 'txt')
        prediction_file = prediction_path + i.replace('png', 'txt')
        calibration_file = test_calib_path + i.replace('png', 'txt')

        # write the prediction file
        with open(prediction_file, 'w') as predict:
            img = cv2.imread(image_file)
            img = np.array(img, dtype='float32')
            P2 = np.array([])
            for line in open(calibration_file):
                if 'P2' in line:
                    P2 = line.split(' ')
                    P2 = np.asarray([float(i) for i in P2[1:]])
                    P2 = np.reshape(P2, (3, 4))

            for line in open(label_file):
                line = line.strip().split(' ')
                obj = detectionInfo(line)
                xmin = int(obj.xmin)
                xmax = int(obj.xmax)
                ymin = int(obj.ymin)
                ymax = int(obj.ymax)
                if obj.name in cfg().KITTI_cat:
                    # cropped 2d bounding box
                    if xmin == xmax or ymin == ymax:
                        continue
                    # 2D detection area
                    patch = img[ymin:ymax, xmin:xmax]
                    patch = cv2.resize(patch, (cfg().norm_h, cfg().norm_w))
                    # patch -= np.array([[[103.939, 116.779, 123.68]]])
                    patch /= 255.0
                    # extend it to match the training dimension
                    patch = np.expand_dims(patch, 0)

                    prediction = model.predict(patch)

                    dim = prediction[0][0]
                    bin_anchor = prediction[1][0]
                    bin_confidence = prediction[2][0]

                    # update with predict dimension
                    dims = dims_avg[obj.name] + dim
                    obj.h, obj.w, obj.l = np.array(
                        [round(dim, 2) for dim in dims])

                    # update with predicted alpha, [-pi, pi]
                    obj.alpha = recover_angle(bin_anchor, bin_confidence,
                                              cfg().bin)

                    # compute global and local orientation
                    obj.rot_global, rot_local = compute_orientaion(P2, obj)

                    # compute and update translation, (x, y, z)
                    obj.tx, obj.ty, obj.tz = translation_constraints(
                        P2, obj, rot_local)

                    # output prediction label
                    output_line = obj.member_to_list()
                    output_line.append(1.0)
                    # Write regressed 3D dim and orientation to file
                    output_line = ' '.join([str(item)
                                            for item in output_line]) + '\n'
                    predict.write(output_line)
                    print('Write predicted labels for: ' + str(i))
    end_time = time.time()
    process_time = (end_time - start_time) / len(val_imgs)
    print(process_time)
import os
import numpy as np
import cv2
import argparse
from utils.read_dir import ReadDir
from data_processing.KITTI_dataloader import KITTILoader
from utils.correspondece_constraint import *

import time

from my_config import MyConfig as cfg

if cfg().network == 'vgg16':
    from model import vgg16 as nn
if cfg().network == 'mobilenet_v2':
    from model import mobilenet_v2 as nn


def predict(args):
    # complie models
    model = nn.network()
    model.load_weights('3dbox_weights_mob.hdf5')
    # model.load_weights(args.w)

    # KITTI_train_gen = KITTILoader(subset='training')
    dims_avg, _ = KITTILoader(subset='tracklet').get_average_dimension()

    # list all the validation images
    if args.a == 'training':
        all_imgs = sorted(os.listdir(test_image_dir))
        val_index = int(len(all_imgs) * cfg().split)
def prepare_input_and_output(train_inst, image_dir):
    '''
    prepare image patch for training
    input:  train_inst -- input image for training
    output: img -- cropped bbox
            train_inst['dims'] -- object dimensions
            train_inst['orient'] -- object orientation (or flipped orientation)
            train_inst['conf_flipped'] -- orientation confidence
    '''
    xmin = train_inst['xmin']  + np.random.randint(-cfg().jit, cfg().jit+1)
    ymin = train_inst['ymin']  + np.random.randint(-cfg().jit, cfg().jit+1)
    xmax = train_inst['xmax']  + np.random.randint(-cfg().jit, cfg().jit+1)
    ymax = train_inst['ymax']  + np.random.randint(-cfg().jit, cfg().jit+1)

    img = cv2.imread(image_dir)

    if cfg().jit != 0:
        xmin = max(xmin, 0)
        ymin = max(ymin, 0)
        xmax = min(xmax, img.shape[1] - 1)
        ymax = min(ymax, img.shape[0] - 1)

    img = copy.deepcopy(img[ymin:ymax + 1, xmin:xmax + 1]).astype(np.float32)

    # flip the image
    # 50% percent choose 1, 50% percent choose 0
    flip = np.random.binomial(1, .5)
    if flip > 0.5:
        img = cv2.flip(img, 1)

    # resize the image to standard size
    img = cv2.resize(img, (cfg().norm_h, cfg().norm_w))
    # minus the mean value in each channel
    # img = img - np.array([[[103.939, 116.779, 123.68]]])
    img /= 255.0

    ### Fix orientation and confidence
    if flip > 0.5:
        return img, train_inst['dims'], train_inst['orient_flipped'], train_inst['conf_flipped']
    else:
        return img, train_inst['dims'], train_inst['orient'], train_inst['conf']
示例#8
0
def network():

    inputs = layers.Input(shape=(cfg().norm_h, cfg().norm_w, 3))

    # Block 1__
    x = layers.Conv2D(64, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block1_conv1')(inputs)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(64, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block1_conv2')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block2_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(128, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block2_conv2')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block3_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(256, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block3_conv2')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(256, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block3_conv3')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block4_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block4_conv2')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block4_conv3')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block5_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block5_conv2')(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, (3, 3),
                      padding='same',
                      kernel_initializer='he_normal',
                      kernel_regularizer=reg.l2(1e-4),
                      name='block5_conv3')(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D(strides=(2, 2), name='block5_pool')(x)

    # layers.Flatten
    x = layers.Flatten(name='Flatten')(x)

    # Dimensions branch
    dimensions = layers.Dense(512, name='d_fc_1')(x)
    dimensions = layers.LeakyReLU(alpha=0.1)(dimensions)
    dimensions = layers.Dropout(0.5)(dimensions)
    dimensions = layers.Dense(3, name='d_fc_2')(dimensions)
    dimensions = layers.LeakyReLU(alpha=0.1, name='dimensions')(dimensions)

    # Orientation branch
    orientation = layers.Dense(256, name='o_fc_1')(x)
    orientation = layers.LeakyReLU(alpha=0.1)(orientation)
    orientation = layers.Dropout(0.5)(orientation)
    orientation = layers.Dense(cfg().bin * 2, name='o_fc_2')(orientation)
    orientation = layers.LeakyReLU(alpha=0.1)(orientation)
    orientation = layers.Reshape((cfg().bin, -1))(orientation)
    orientation = layers.Lambda(l2_normalize, name='orientation')(orientation)

    # Confidence branch
    confidence = layers.Dense(256, name='c_fc_1')(x)
    confidence = layers.LeakyReLU(alpha=0.1)(confidence)
    confidence = layers.Dropout(0.5)(confidence)
    confidence = layers.Dense(cfg().bin,
                              activation='softmax',
                              name='confidence')(confidence)

    # Build model
    model = tf.keras.Model(inputs, [dimensions, orientation, confidence])
    model.summary()

    return model