def predict(prediction_dir, label_dir, image_dir, calibration_file):
    # complie models
    model = nn.network()
    model.load_weights('3dbox_weights_mob.hdf5')
    # model.load_weights(args.w)

    # KITTI_train_gen = KITTILoader(subset='training')
    dims_avg, _ = KITTILoader(subset='tracklet').get_average_dimension()

    val_imgs = sorted(
        [im for im in os.listdir(image_dir) if not im.startswith('.')])

    P2 = np.array([])
    for line in open(calibration_file):
        if 'P2' in line:
            P2 = line.split(' ')
            P2 = np.asarray([float(i) for i in P2[1:]])
            P2 = np.reshape(P2, (3, 4))

    for img in tqdm(val_imgs):
        image_file = os.path.join(image_dir, img)
        label_file = os.path.join(label_dir, img.replace('png', 'txt'))
        prediction_file = os.path.join(prediction_dir,
                                       img.replace('png', 'txt'))

        # write the prediction file
        with open(prediction_file, 'w') as predict:
            img = cv2.imread(image_file)
            img = np.array(img, dtype='float32')

            for line in open(label_file):
                line = line.strip().split(' ')
                obj = detectionInfo(line)
                xmin = int(obj.xmin)
                xmax = int(obj.xmax)
                ymin = int(obj.ymin)
                ymax = int(obj.ymax)
                if obj.name in cfg.KITTI_cat:

                    # cropped 2d bounding box
                    if xmin == xmax or ymin == ymax:
                        continue
                    # 2D detection area
                    patch = img[ymin:ymax, xmin:xmax]
                    try:
                        patch = cv2.resize(patch, (cfg.norm_h, cfg.norm_w))
                    except cv2.error:
                        continue
                    # patch -= np.array([[[103.939, 116.779, 123.68]]])
                    patch /= 255.0
                    # extend it to match the training dimension
                    patch = np.expand_dims(patch, 0)

                    prediction = model.predict(patch)

                    dim = prediction[0][0]
                    bin_anchor = prediction[1][0]
                    bin_confidence = prediction[2][0]

                    # update with predict dimension
                    dims = dims_avg[obj.name] + dim
                    obj.h, obj.w, obj.l = np.array(
                        [round(dim, 2) for dim in dims])

                    # update with predicted alpha, [-pi, pi]
                    obj.alpha = recover_angle(bin_anchor, bin_confidence,
                                              cfg.bin)

                    # compute global and local orientation
                    obj.rot_global, rot_local = compute_orientaion(P2, obj)

                    # compute and update translation, (x, y, z)
                    obj.tx, obj.ty, obj.tz = translation_constraints(
                        P2, obj, rot_local)

                    # output prediction label
                    output_line = obj.member_to_list()
                    output_line.append(1.0)
                    # Write regressed 3D dim and orientation to file
                    output_line = ' '.join([str(item)
                                            for item in output_line]) + '\n'
                    predict.write(output_line)
示例#2
0
def predict(args):
    # complie models
    model = nn.network()
    # model.load_weights('3dbox_weights_1st.hdf5')
    model.load_weights(args.w)

    # KITTI_train_gen = KITTILoader(subset='training')
    dims_avg, _ = KITTILoader(subset='training').get_average_dimension()

    # list all the validation images
    if args.a == 'training':
        all_imgs = sorted(os.listdir(test_image_dir))
        val_index = int(len(all_imgs) * cfg().split)
        val_imgs = all_imgs[val_index:]

    else:
        val_imgs = sorted(os.listdir(test_image_dir))

    start_time = time.time()

    for i in val_imgs:
        image_file = test_image_dir + i
        label_file = test_label_dir + i.replace('png', 'txt')
        prediction_file = prediction_path + i.replace('png', 'txt')
        calibration_file = test_calib_path + i.replace('png', 'txt')

        # write the prediction file
        with open(prediction_file, 'w') as predict:
            img = cv2.imread(image_file)
            img = np.array(img, dtype='float32')
            P2 = np.array([])
            for line in open(calibration_file):
                if 'P2' in line:
                    P2 = line.split(' ')
                    P2 = np.asarray([float(i) for i in P2[1:]])
                    P2 = np.reshape(P2, (3, 4))

            for line in open(label_file):
                line = line.strip().split(' ')
                obj = detectionInfo(line)
                xmin = int(obj.xmin)
                xmax = int(obj.xmax)
                ymin = int(obj.ymin)
                ymax = int(obj.ymax)
                if obj.name in cfg().KITTI_cat:
                    # cropped 2d bounding box
                    if xmin == xmax or ymin == ymax:
                        continue
                    # 2D detection area
                    patch = img[ymin:ymax, xmin:xmax]
                    patch = cv2.resize(patch, (cfg().norm_h, cfg().norm_w))
                    patch -= np.array([[[103.939, 116.779, 123.68]]])
                    # extend it to match the training dimension
                    patch = np.expand_dims(patch, 0)

                    prediction = model.predict(patch)

                    dim = prediction[0][0]
                    bin_anchor = prediction[1][0]
                    bin_confidence = prediction[2][0]

                    # update with predict dimension
                    dims = dims_avg[obj.name] + dim
                    obj.h, obj.w, obj.l = np.array(
                        [round(dim, 2) for dim in dims])

                    # update with predicted alpha, [-pi, pi]
                    obj.alpha = recover_angle(bin_anchor, bin_confidence,
                                              cfg().bin)

                    # compute global and local orientation
                    obj.rot_global, rot_local = compute_orientaion(P2, obj)

                    # compute and update translation, (x, y, z)
                    obj.tx, obj.ty, obj.tz = translation_constraints(
                        P2, obj, rot_local)

                    # output prediction label
                    output_line = obj.member_to_list()
                    output_line.append(1.0)
                    # Write regressed 3D dim and orientation to file
                    output_line = ' '.join([str(item)
                                            for item in output_line]) + '\n'
                    predict.write(output_line)
                    print('Write predicted labels for: ' + str(i))
    end_time = time.time()
    process_time = (end_time - start_time) / len(val_imgs)
    print(process_time)
示例#3
0
def train():
    KITTI_train_gen = KITTILoader(subset='training')
    dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension()

    new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg)

    model = nn.network()
    #model.load_weights('model00000296.hdf5')

    early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, mode='min', verbose=1)
    checkpoint = callbacks.ModelCheckpoint('model{epoch:08d}.hdf5', monitor='val_loss', verbose=1, save_best_only=False, mode='min', period=1)
    tensorboard = callbacks.TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=False)

    

    all_examples = len(new_data)
    trv_split = int(cfg().split * all_examples) # train val split

    train_gen = data_gen(new_data[: trv_split])
    valid_gen = data_gen(new_data[trv_split : all_examples])

    print("READY FOR TRAINING")

    train_num = int(np.ceil(trv_split / cfg().batch_size))
    valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size))

    #gen_flow = gen_flow_for_two_inputs(X_train, X_angle_train, y_train)

    # choose the minimizer to be sgd
    # minimizer = optimizer.SGD(lr=0.0001, momentum = 0.9)
    minimizer = optimizer.Adam(lr=0.0001)

    # multi task learning
    model.compile(optimizer=minimizer,  #minimizer,
                  loss={'dimensions': 'mean_squared_error', 'orientation': orientation_loss, 'confidence': 'categorical_crossentropy'},
                  loss_weights={'dimensions': 1., 'orientation': 10., 'confidence': 5.})

    print("####################################################")
    print(K.get_value(model.optimizer.lr))

    # Tambahan aing
    def scheduler(epoch):
        if epoch%10==0 and epoch!=0:
            lr = K.get_value(model.optimizer.lr)
            K.set_value(model.optimizer.lr, lr*.8)
            print("lr changed to {}".format(lr*.8))
            print("lr = ", K.get_value(model.optimizer.lr))
        return K.get_value(model.optimizer.lr)

    lr_sched = callbacks.LearningRateScheduler(scheduler)


    # d:0.0088 o:0.0042, c:0.0098
    # steps_per_epoch=train_num,
    # validation_steps=valid_num,
    # callbacks=[early_stop, checkpoint, tensorboard],
    model.fit_generator(generator=train_gen,
                        steps_per_epoch=train_num,
                        epochs=500,
                        verbose=1,
                        validation_data=valid_gen,
                        validation_steps=valid_num,
                        shuffle=True,
                        callbacks=[checkpoint, tensorboard, lr_sched],
                        max_queue_size=3)
示例#4
0
def predict(args):
    # complie models
    model = nn.network()
    # model.load_weights('3dbox_weights_1st.hdf5')
    model.load_weights(args.w)

    # KITTI_train_gen = KITTILoader(subset='training')
    dims_avg, _ = KITTILoader(subset='training').get_average_dimension()

    print("dims_avg = ", dims_avg)
    # dims_avg =  {'Car': array([1.52608343, 1.62858987, 3.88395449])}

    # list all the validation images
    if args.a == 'training':
        all_imgs = sorted(os.listdir(test_image_dir))
        val_index = int(len(all_imgs) * cfg().split)
        val_imgs = all_imgs[val_index:]

    else:
        val_imgs = sorted(os.listdir(test_image_dir))

    start_time = time.time()

    for i in val_imgs:
        image_file = test_image_dir + i
        depth_file = test_depth_dir + i
        label_file = test_label_dir + i.replace('png', 'txt')
        prediction_file = prediction_path + i.replace('png', 'txt')
        calibration_file = test_calib_path + i.replace('png', 'txt')
        #calibration_file = os.path.join('/media/ferdyan/NewDisk/Trajectory_Final/bbox_3d/0000.txt')

        # write the prediction file
        with open(prediction_file, 'w') as predict:
            img = cv2.imread(image_file)
            img = np.array(img, dtype='float32')

            dpth = cv2.imread(depth_file)
            dpth = np.array(dpth, dtype='float32')

            P2 = np.array([])
            for line in open(calibration_file):
                if 'P2' in line:
                    P2 = line.split(' ')
                    P2 = np.asarray([float(i) for i in P2[1:]])
                    P2 = np.reshape(P2, (3, 4))

            for line in open(label_file):
                line = line.strip().split(' ')
                #print("line = ", line)
                obj = detectionInfo(line)
                xmin = int(obj.xmin)
                xmax = int(obj.xmax)
                ymin = int(obj.ymin)
                ymax = int(obj.ymax)

                box2d = [xmin, ymin, xmax, ymax]
                box_2D = np.asarray(box2d, dtype=np.float)

                if obj.name in cfg().KITTI_cat:
                    # cropped 2d bounding box
                    if xmin == xmax or ymin == ymax:
                        continue
                    # 2D detection area RGB image
                    patch = img[ymin:ymax, xmin:xmax]
                    patch = cv2.resize(patch, (cfg().norm_h, cfg().norm_w))
                    patch -= np.array([[[103.939, 116.779, 123.68]]])
                    # extend it to match the training dimension
                    patch = np.expand_dims(patch, 0)

                    # 2D detection area depth map
                    #patch_d = dpth[ymin : ymax, xmin : xmax]
                    #patch_d = cv2.resize(patch_d, (cfg().norm_h, cfg().norm_w))
                    #patch_d -= np.array([[[103.939, 116.779, 123.68]]])
                    # extend it to match the training dimension
                    #patch_d = np.expand_dims(patch_d, 0)

                    # one
                    prediction = model.predict([patch])

                    # two
                    #prediction = model.predict([patch, patch_d])

                    # TAMBAHAN AING
                    # Transform regressed angle
                    box2d_center_x = (xmin + xmax) / 2.0
                    theta_ray = np.arctan(fx / (box2d_center_x - u0))

                    if theta_ray < 0:
                        theta_ray = theta_ray + np.pi

                    max_anc = np.argmax(prediction[2][0])
                    anchors = prediction[1][0][max_anc]

                    if anchors[1] > 0:
                        angle_offset = np.arccos(anchors[0])
                    else:
                        angle_offset = -np.arccos(anchors[0])

                    bin_num = prediction[2][0].shape[0]
                    wedge = 2. * np.pi / bin_num
                    theta_loc = angle_offset + max_anc * wedge

                    theta = theta_loc + theta_ray
                    # object's yaw angle
                    yaw = np.pi / 2 - theta

                    points2D = gen_3D_box(yaw, dims, cam_to_img, box_2D)
                    draw_3D_box(img, points2D)

        cv2.imshow('f', img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        #cv2.imwrite('output/'+ f.replace('png','jpg'), img)

    end_time = time.time()
    process_time = (end_time - start_time) / len(val_imgs)
    print(process_time)
示例#5
0
def train():
    KITTI_train_gen = KITTILoader(subset='training')
    dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension()

    new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg)

    model = nn.network()
    # model.load_weights('3dbox_weights_mob.hdf5')

    early_stop = callbacks.EarlyStopping(monitor='val_loss',
                                         min_delta=0.001,
                                         patience=10,
                                         mode='min',
                                         verbose=1)
    checkpoint = callbacks.ModelCheckpoint('3dbox_weights_mob.hdf5',
                                           monitor='val_loss',
                                           verbose=1,
                                           save_best_only=True,
                                           mode='min',
                                           period=1)
    tensorboard = callbacks.TensorBoard(log_dir='logs/',
                                        histogram_freq=0,
                                        write_graph=True,
                                        write_images=False)

    all_examples = len(new_data)
    trv_split = int(cfg().split * all_examples)  # train val split

    train_gen = data_gen(new_data[:trv_split])
    valid_gen = data_gen(new_data[trv_split:all_examples])

    train_num = int(np.ceil(trv_split / cfg().batch_size))
    valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size))

    # choose the minimizer to be sgd
    minimizer = optimizer.SGD(lr=0.0001, momentum=0.9)

    # multi task learning
    model.compile(
        optimizer=minimizer,  #minimizer,
        loss={
            'dimensions': 'mean_squared_error',
            'orientation': orientation_loss,
            'confidence': 'binary_crossentropy'
        },
        loss_weights={
            'dimensions': 1.,
            'orientation': 10.,
            'confidence': 5.
        })
    # d:0.0088 o:0.0042, c:0.0098

    model.fit_generator(generator=train_gen,
                        steps_per_epoch=train_num,
                        epochs=500,
                        verbose=1,
                        validation_data=valid_gen,
                        validation_steps=valid_num,
                        shuffle=True,
                        callbacks=[early_stop, checkpoint, tensorboard],
                        max_queue_size=3)
示例#6
0
def train():
    KITTI_train_gen = KITTILoader(subset='training')
    dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension()

    new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg)

    model = nn.network()
    # model.load_weights('3dbox_weights_mob.hdf5')

    early_stop = callbacks.EarlyStopping(monitor='val_loss',
                                         min_delta=0.001,
                                         patience=10,
                                         mode='min',
                                         verbose=1)
    checkpoint = callbacks.ModelCheckpoint(
        '3dbox_mbnv2_{}x{}_float32.hdf5'.format(cfg().norm_h,
                                                cfg().norm_w),
        monitor='val_loss',
        verbose=1,
        save_best_only=True,
        mode='min',
        period=1)
    tensorboard = callbacks.TensorBoard(log_dir='logs/',
                                        histogram_freq=0,
                                        write_graph=True,
                                        write_images=False)

    all_examples = len(new_data)
    trv_split = int(cfg().split * all_examples)  # train val split

    train_gen = data_gen(new_data[:trv_split])
    valid_gen = data_gen(new_data[trv_split:all_examples])

    train_num = int(np.ceil(trv_split / cfg().batch_size))
    valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size))

    # choose the minimizer to be sgd
    minimizer = optimizer.SGD(lr=0.0001, momentum=0.9)

    # multi task learning
    model.compile(
        optimizer=minimizer,  #minimizer,
        loss={
            'dimensions': 'mean_squared_error',
            'orientation': orientation_loss,
            'confidence': 'binary_crossentropy'
        },
        loss_weights={
            'dimensions': 1.,
            'orientation': 10.,
            'confidence': 5.
        })
    # d:0.0088 o:0.0042, c:0.0098

    model.fit_generator(generator=train_gen,
                        steps_per_epoch=train_num,
                        epochs=500,
                        verbose=1,
                        validation_data=valid_gen,
                        validation_steps=valid_num,
                        shuffle=True,
                        callbacks=[early_stop, checkpoint, tensorboard],
                        max_queue_size=3)

    tf.saved_model.save(model,
                        'saved_model_{}x{}'.format(cfg().norm_h,
                                                   cfg().norm_w))
    model.save('3dbox_mbnv2_{}x{}_float32.h5'.format(cfg().norm_h,
                                                     cfg().norm_w))

    full_model = tf.function(lambda inputs: model(inputs))
    full_model = full_model.get_concrete_function(
        inputs=(tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype)))
    frozen_func = convert_variables_to_constants_v2(full_model,
                                                    lower_control_flow=False)
    frozen_func.graph.as_graph_def()
    tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
                      logdir=".",
                      name="3dbox_mbnv2_{}x{}_float32.pb".format(
                          cfg().norm_h,
                          cfg().norm_w),
                      as_text=False)