def compute_anchors(angle): """ compute angle offset and which bin the angle lies in input: fixed local orientation [0, 2pi] output: [bin number, angle offset] For two bins: if angle < pi, l = 0, r = 1 if angle < 1.65, return [0, angle] elif pi - angle < 1.65, return [1, angle - pi] if angle > pi, l = 1, r = 2 if angle - pi < 1.65, return [1, angle - pi] elif 2pi - angle < 1.65, return [0, angle - 2pi] """ anchors = [] wedge = 2. * np.pi / cfg().bin # 2pi / bin = pi l_index = int(angle / wedge) # angle/pi r_index = l_index + 1 # (angle - l_index*pi) < pi/2 * 1.05 = 1.65 if (angle - l_index * wedge) < wedge / 2 * (1 + cfg().overlap / 2): anchors.append([l_index, angle - l_index * wedge]) # (r*pi + pi - angle) < pi/2 * 1.05 = 1.65 if (r_index * wedge - angle) < wedge / 2 * (1 + cfg().overlap / 2): anchors.append([r_index % cfg().bin, angle - r_index * wedge]) return anchors
def orientation_confidence_flip(image_data, dims_avg): for data in image_data: # minus the average dimensions data['dims'] = data['dims'] - dims_avg[data['name']] # fix orientation and confidence for no flip orientation = np.zeros((cfg().bin, 2)) confidence = np.zeros(cfg().bin) anchors = compute_anchors(data['new_alpha']) for anchor in anchors: # each angle is represented in sin and cos orientation[anchor[0]] = np.array( [np.cos(anchor[1]), np.sin(anchor[1])]) confidence[anchor[0]] = 1 confidence = confidence / np.sum(confidence) data['orient'] = orientation data['conf'] = confidence # Fix orientation and confidence for random flip orientation = np.zeros((cfg().bin, 2)) confidence = np.zeros(cfg().bin) anchors = compute_anchors( 2. * np.pi - data['new_alpha']) # compute orientation and bin # for flipped images for anchor in anchors: orientation[anchor[0]] = np.array( [np.cos(anchor[1]), np.sin(anchor[1])]) confidence[anchor[0]] = 1 confidence = confidence / np.sum(confidence) data['orient_flipped'] = orientation data['conf_flipped'] = confidence return image_data
def network(): inputs = layers.Input(shape=(cfg().norm_h, cfg().norm_w, 3)) x = _conv_block(inputs, 32, (3, 3), strides=(2, 2)) x = _inverted_residual_block(x, 16, (3, 3), t=1, strides=1, n=1) x = _inverted_residual_block(x, 24, (3, 3), t=6, strides=2, n=2) x = _inverted_residual_block(x, 32, (3, 3), t=6, strides=2, n=3) x = _inverted_residual_block(x, 64, (3, 3), t=6, strides=2, n=4) x = _inverted_residual_block(x, 96, (3, 3), t=6, strides=1, n=3) x = _inverted_residual_block(x, 160, (3, 3), t=6, strides=2, n=3) x = _inverted_residual_block(x, 320, (3, 3), t=6, strides=1, n=1) x = _conv_block(x, 1280, (1, 1), strides=(1, 1)) x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape((1, 1, 1280))(x) x = layers.Dropout(0.3, name='Dropout')(x) # Dimensions branch dimensions = layers.Conv2D(3, (1, 1), padding='same', name='d_conv')(x) dimensions = layers.Reshape((3, ), name='dimensions')(dimensions) # Orientation branch orientation = layers.Conv2D(4, (1, 1), padding='same', name='o_conv')(x) orientation = layers.Reshape((cfg().bin, -1))(orientation) orientation = layers.Lambda(l2_normalize, name='orientation')(orientation) # Confidence branch confidence = layers.Conv2D(cfg().bin, (1, 1), padding='same', name='c_conv')(x) confidence = layers.Activation('softmax', name='softmax')(confidence) confidence = layers.Reshape((2, ), name='confidence')(confidence) # Build model model = tf.keras.Model(inputs, [dimensions, orientation, confidence]) model.summary() return model
def data_gen(all_objs): ''' generate data for training input: all_objs -- all objects used for training batch_size -- number of images used for training at once yield: x_batch -- (batch_size, 224, 224, 3), input images to training process at each batch d_batch -- (batch_size, 3), object dimensions o_batch -- (batch_size, 2, 2), object orientation c_batch -- (batch_size, 2), angle confidence ''' num_obj = len(all_objs) keys = list(range(num_obj)) np.random.shuffle(keys) l_bound = 0 r_bound = cfg().batch_size if cfg().batch_size < num_obj else num_obj while True: if l_bound == r_bound: l_bound = 0 r_bound = cfg().batch_size if cfg().batch_size < num_obj else num_obj np.random.shuffle(keys) currt_inst = 0 x_batch = np.zeros((r_bound - l_bound, 224, 224, 3)) d_batch = np.zeros((r_bound - l_bound, 3)) o_batch = np.zeros((r_bound - l_bound, cfg().bin, 2)) c_batch = np.zeros((r_bound - l_bound, cfg().bin)) for key in keys[l_bound:r_bound]: # augment input image and fix object's orientation and confidence image, dimension, orientation, confidence = prepare_input_and_output(all_objs[key], all_objs[key]['image'], ) x_batch[currt_inst, :] = image d_batch[currt_inst, :] = dimension o_batch[currt_inst, :] = orientation c_batch[currt_inst, :] = confidence currt_inst += 1 yield x_batch, [d_batch, o_batch, c_batch] l_bound = r_bound r_bound = r_bound + cfg().batch_size if r_bound > num_obj: r_bound = num_obj
def predict(args): # complie models model = nn.network() model.load_weights('3dbox_weights_mob.hdf5') # model.load_weights(args.w) # KITTI_train_gen = KITTILoader(subset='training') dims_avg, _ = KITTILoader(subset='tracklet').get_average_dimension() # list all the validation images if args.a == 'training': all_imgs = sorted(os.listdir(test_image_dir)) val_index = int(len(all_imgs) * cfg().split) val_imgs = all_imgs[val_index:] else: val_imgs = sorted(os.listdir(test_image_dir)) start_time = time.time() for i in val_imgs: image_file = test_image_dir + i label_file = test_label_dir + i.replace('png', 'txt') prediction_file = prediction_path + i.replace('png', 'txt') calibration_file = test_calib_path + i.replace('png', 'txt') # write the prediction file with open(prediction_file, 'w') as predict: img = cv2.imread(image_file) img = np.array(img, dtype='float32') P2 = np.array([]) for line in open(calibration_file): if 'P2' in line: P2 = line.split(' ') P2 = np.asarray([float(i) for i in P2[1:]]) P2 = np.reshape(P2, (3, 4)) for line in open(label_file): line = line.strip().split(' ') obj = detectionInfo(line) xmin = int(obj.xmin) xmax = int(obj.xmax) ymin = int(obj.ymin) ymax = int(obj.ymax) if obj.name in cfg().KITTI_cat: # cropped 2d bounding box if xmin == xmax or ymin == ymax: continue # 2D detection area patch = img[ymin:ymax, xmin:xmax] patch = cv2.resize(patch, (cfg().norm_h, cfg().norm_w)) # patch -= np.array([[[103.939, 116.779, 123.68]]]) patch /= 255.0 # extend it to match the training dimension patch = np.expand_dims(patch, 0) prediction = model.predict(patch) dim = prediction[0][0] bin_anchor = prediction[1][0] bin_confidence = prediction[2][0] # update with predict dimension dims = dims_avg[obj.name] + dim obj.h, obj.w, obj.l = np.array( [round(dim, 2) for dim in dims]) # update with predicted alpha, [-pi, pi] obj.alpha = recover_angle(bin_anchor, bin_confidence, cfg().bin) # compute global and local orientation obj.rot_global, rot_local = compute_orientaion(P2, obj) # compute and update translation, (x, y, z) obj.tx, obj.ty, obj.tz = translation_constraints( P2, obj, rot_local) # output prediction label output_line = obj.member_to_list() output_line.append(1.0) # Write regressed 3D dim and orientation to file output_line = ' '.join([str(item) for item in output_line]) + '\n' predict.write(output_line) print('Write predicted labels for: ' + str(i)) end_time = time.time() process_time = (end_time - start_time) / len(val_imgs) print(process_time)
import os import numpy as np import cv2 import argparse from utils.read_dir import ReadDir from data_processing.KITTI_dataloader import KITTILoader from utils.correspondece_constraint import * import time from my_config import MyConfig as cfg if cfg().network == 'vgg16': from model import vgg16 as nn if cfg().network == 'mobilenet_v2': from model import mobilenet_v2 as nn def predict(args): # complie models model = nn.network() model.load_weights('3dbox_weights_mob.hdf5') # model.load_weights(args.w) # KITTI_train_gen = KITTILoader(subset='training') dims_avg, _ = KITTILoader(subset='tracklet').get_average_dimension() # list all the validation images if args.a == 'training': all_imgs = sorted(os.listdir(test_image_dir)) val_index = int(len(all_imgs) * cfg().split)
def prepare_input_and_output(train_inst, image_dir): ''' prepare image patch for training input: train_inst -- input image for training output: img -- cropped bbox train_inst['dims'] -- object dimensions train_inst['orient'] -- object orientation (or flipped orientation) train_inst['conf_flipped'] -- orientation confidence ''' xmin = train_inst['xmin'] + np.random.randint(-cfg().jit, cfg().jit+1) ymin = train_inst['ymin'] + np.random.randint(-cfg().jit, cfg().jit+1) xmax = train_inst['xmax'] + np.random.randint(-cfg().jit, cfg().jit+1) ymax = train_inst['ymax'] + np.random.randint(-cfg().jit, cfg().jit+1) img = cv2.imread(image_dir) if cfg().jit != 0: xmin = max(xmin, 0) ymin = max(ymin, 0) xmax = min(xmax, img.shape[1] - 1) ymax = min(ymax, img.shape[0] - 1) img = copy.deepcopy(img[ymin:ymax + 1, xmin:xmax + 1]).astype(np.float32) # flip the image # 50% percent choose 1, 50% percent choose 0 flip = np.random.binomial(1, .5) if flip > 0.5: img = cv2.flip(img, 1) # resize the image to standard size img = cv2.resize(img, (cfg().norm_h, cfg().norm_w)) # minus the mean value in each channel # img = img - np.array([[[103.939, 116.779, 123.68]]]) img /= 255.0 ### Fix orientation and confidence if flip > 0.5: return img, train_inst['dims'], train_inst['orient_flipped'], train_inst['conf_flipped'] else: return img, train_inst['dims'], train_inst['orient'], train_inst['conf']
def network(): inputs = layers.Input(shape=(cfg().norm_h, cfg().norm_w, 3)) # Block 1__ x = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block1_conv1')(inputs) x = layers.Activation('relu')(x) x = layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block1_conv2')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D(strides=(2, 2), name='block1_pool')(x) # Block 2 x = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block2_conv1')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block2_conv2')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D(strides=(2, 2), name='block2_pool')(x) # Block 3 x = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block3_conv1')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block3_conv2')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block3_conv3')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D(strides=(2, 2), name='block3_pool')(x) # Block 4 x = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block4_conv1')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block4_conv2')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block4_conv3')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D(strides=(2, 2), name='block4_pool')(x) # Block 5 x = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block5_conv1')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block5_conv2')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(512, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=reg.l2(1e-4), name='block5_conv3')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D(strides=(2, 2), name='block5_pool')(x) # layers.Flatten x = layers.Flatten(name='Flatten')(x) # Dimensions branch dimensions = layers.Dense(512, name='d_fc_1')(x) dimensions = layers.LeakyReLU(alpha=0.1)(dimensions) dimensions = layers.Dropout(0.5)(dimensions) dimensions = layers.Dense(3, name='d_fc_2')(dimensions) dimensions = layers.LeakyReLU(alpha=0.1, name='dimensions')(dimensions) # Orientation branch orientation = layers.Dense(256, name='o_fc_1')(x) orientation = layers.LeakyReLU(alpha=0.1)(orientation) orientation = layers.Dropout(0.5)(orientation) orientation = layers.Dense(cfg().bin * 2, name='o_fc_2')(orientation) orientation = layers.LeakyReLU(alpha=0.1)(orientation) orientation = layers.Reshape((cfg().bin, -1))(orientation) orientation = layers.Lambda(l2_normalize, name='orientation')(orientation) # Confidence branch confidence = layers.Dense(256, name='c_fc_1')(x) confidence = layers.LeakyReLU(alpha=0.1)(confidence) confidence = layers.Dropout(0.5)(confidence) confidence = layers.Dense(cfg().bin, activation='softmax', name='confidence')(confidence) # Build model model = tf.keras.Model(inputs, [dimensions, orientation, confidence]) model.summary() return model