Exemplo n.º 1
0
def create_pose_estimation_model(pretrained,
                                 dataset,
                                 arch,
                                 load_vgg19=None,
                                 parallel=True,
                                 device_ids=None):
    # noinspection PyGlobalUndefined
    global msglogger

    model = None
    dataset = dataset.lower()

    if dataset == 'coco':
        if arch == 'shufflenetv2':
            model = rtpose_shufflenetV2.Network(width_multiplier=1.0)
            if pretrained:
                msglogger.info(
                    'No pretrained ShuffleNetV2 model available. Init randomly.'
                )
        elif arch == 'vgg19':
            model = rtpose_vgg.get_model(trunk='vgg19')
            if pretrained:
                model_dir = Path('./pretrained')
                model_dir.mkdir(exist_ok=True)
                rtpose_vgg.use_vgg(model, model_path, 'vgg19')
            if load_vgg19:
                model.load_state_dict(torch.load(load_vgg19))
        elif arch == 'hourglass':
            model = rtpose_hourglass.hg(num_stacks=8,
                                        num_blocks=1,
                                        paf_classes=38,
                                        ht_classes=19)
            if pretrained:
                msglogger.info(
                    'No pretrained Hourglass model available. Init randomly.')
    else:
        raise ValueError('Could not recognize dataset {}'.format(dataset))

    msglogger.info("=> creating a %s%s model with the %s dataset" %
                   ('pretrained ' if pretrained else '', arch, dataset))

    if torch.cuda.is_available() and device_ids != -1:
        device = 'cuda'
        if parallel:
            print('Data parallel: device_ids =', device_ids)
            net = torch.nn.DataParallel(model, device_ids=device_ids)
    else:
        device = 'cpu'

    return model.to(device)
Exemplo n.º 2
0
                        368,
                        8,
                        preprocess='vgg',
                        training=False,
                        batch_size=args.batch_size,
                        params_transform=params_transform,
                        shuffle=False,
                        num_workers=4)
print('val dataset len: {}'.format(len(valid_data.dataset)))

# model
model = get_model(trunk='vgg19')
#model = encoding.nn.DataParallelModel(model, device_ids=args.gpu_ids)
model = torch.nn.DataParallel(model).cuda()
# load pretrained
use_vgg(model, args.model_path, 'vgg19')

# Fix the VGG weights first, and then the weights will be released
for i in range(20):
    for param in model.module.model0[i].parameters():
        param.requires_grad = False

trainable_vars = [param for param in model.parameters() if param.requires_grad]
optimizer = torch.optim.SGD(trainable_vars,
                            lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay,
                            nesterov=args.nesterov)

writer = SummaryWriter(log_dir=args.logdir)
Exemplo n.º 3
0
def _train(class_name, path_to_data_dir, path_to_logs_dir, batch_size, epochs,
           restore):

    # create tensorboard
    writer = SummaryWriter(path_to_logs_dir)

    # dataloader
    train_dataset = Dataset(class_name=class_name,
                            path_to_data=path_to_data_dir)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size,
                                  shuffle=True,
                                  num_workers=0,
                                  drop_last=True)
    val_dataset = Dataset(path_to_data=path_to_data_dir,
                          class_name=class_name,
                          split='val')
    val_dataloader = DataLoader(val_dataset,
                                batch_size,
                                shuffle=False,
                                num_workers=0,
                                drop_last=True)

    # load model
    model = get_model(trunk='vgg19')
    model = model.cuda()
    use_vgg(model, './model', 'vgg19')

    # restore model
    if restore:
        model.load_state_dict(torch.load(restore))

    model.train()

    # freeze low-level layer
    for i in range(20):
        for param in model.model0[i].parameters():
            param.requires_grad = False
    trainable_vars = [
        param for param in model.parameters() if param.requires_grad
    ]
    optimizer = torch.optim.Adam(trainable_vars, lr=0.0001)

    epoch = 0
    step = 1
    best_mse = 1.0

    while epoch != epochs:
        for batch_index, (images, heatmaps_target, pafs_target, _,
                          _) in enumerate(train_dataloader):
            images = images.cuda()
            _, saved_for_loss = model(images)
            loss, heatmaps_losses, pafs_losses = _loss(saved_for_loss,
                                                       heatmaps_target.cuda(),
                                                       pafs_target.cuda())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if step % 10 == 0:
                print('Epoch: {}, Step: {}, Loss: {}'.format(
                    epoch, step, loss.data.item()))
            writer.add_scalar('train_total_loss/loss', loss, step)
            for stage, (heatmaps_loss, pafs_loss) in enumerate(
                    zip(heatmaps_losses, pafs_losses)):
                writer.add_scalar(
                    'train_heatmaps_loss/stage_{}'.format(str(stage)),
                    heatmaps_loss, step)
                writer.add_scalar(
                    'train_pafs_loss/stage_{}'.format(str(stage)), pafs_loss,
                    step)
            if step % 1000 == 0:
                pafs_loss, heatmaps_loss = _validate(model, val_dataloader)
                total_loss = pafs_loss + heatmaps_loss
                print('Validation Paf MSE: {} Heatmap MSE: {} Total MSE: {}'.
                      format(pafs_loss, heatmaps_loss, total_loss))
                writer.add_scalar('val/heatmaps_loss', heatmaps_loss, step)
                writer.add_scalar('val/pafs_loss', pafs_loss, step)
                writer.add_scalar('val/total_loss', total_loss, step)
                if total_loss < best_mse:
                    print('Save checkpoint')
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            path_to_logs_dir,
                            '{}-checkpoint-best.pth'.format(class_name)))
                    best_mse = total_loss
                    print('Best MSE: {}'.format(total_loss))
                model.train()
            step += 1
        epoch += 1
    print('Save checkpoint')
    torch.save(
        model.state_dict(),
        os.path.join(path_to_logs_dir,
                     '{}-checkpoint-last.pth'.format(class_name)))
Exemplo n.º 4
0
def main(args):
    output_dir = args.output_dir
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    path_to_data_dir = args.path_to_data_dir
    if not os.path.exists(path_to_data_dir):
        raise FileNotFoundError(path_to_data_dir)
    path_to_checkpoint = args.checkpoint
    if not os.path.exists(path_to_checkpoint):
        raise FileNotFoundError(path_to_data_dir)
    class_name = args.class_name
    fps = args.fps
    img_prefix = args.img_prefix

    # load pre-trained model
    model = get_model(trunk='vgg19')
    model = model.cuda()
    use_vgg(model, './model', 'vgg19')
    print("=> Load pre-trained model from {}".format(path_to_checkpoint))
    model.load_state_dict(torch.load(path_to_checkpoint))
    model.eval()

    # parameter of object size for pnp solver
    print("=> Load {} object size".format(class_name))
    path_to_object_seetings = os.path.join(path_to_data_dir,
                                           '_object_settings.json')
    if not os.path.exists(path_to_object_seetings):
        raise FileNotFoundError(path_to_object_seetings)
    object_list = json.load(open(path_to_object_seetings))['exported_objects']
    object_size = None
    for obj in object_list:
        if obj['class'].find(class_name) != -1:
            object_size = obj['cuboid_dimensions']
    if not object_size:
        raise ValueError("Object size is none")
    _cuboid3d = Cuboid3d(object_size)
    cuboid3d_points = np.array(_cuboid3d.get_vertices())

    # parameter of camera for pnp solver
    path_to_camera_seetings = os.path.join(path_to_data_dir,
                                           '_camera_settings.json')
    if not os.path.exists(path_to_camera_seetings):
        raise FileNotFoundError(path_to_camera_seetings)
    intrinsic_settings = json.load(open(
        path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings']
    matrix_camera = np.zeros((3, 3))
    matrix_camera[0, 0] = intrinsic_settings['fx']
    matrix_camera[1, 1] = intrinsic_settings['fy']
    matrix_camera[0, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[1, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[2, 2] = 1
    try:
        dist_coeffs = np.array(
            json.load(open(path_to_camera_seetings))['camera_settings'][0]
            ["distortion_coefficients"])
    except KeyError:
        dist_coeffs = np.zeros((4, 1))
    path_to_sequences = sorted(
        glob.glob(os.path.join(path_to_data_dir, '*.{}'.format(img_prefix))))

    for img_path in path_to_sequences:
        original_img = crop(cv2.imread(img_path))
        ratio = max(original_img.shape[:2]) / Config.crop_size
        img = cv2.resize(original_img, (Config.crop_size, Config.crop_size))
        img = preprocess(img).float()
        img = torch.unsqueeze(img, 0)
        out, _ = model(img.cuda())
        line, vertex = out[0].squeeze(), out[1].squeeze()
        objects, peaks = find_objects(vertex, line)
        original_img = cv2.putText(original_img,
                                   "Class name: {}".format(class_name),
                                   (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1,
                                   (255, 255, 255), 2)

        if len(objects) > 0:
            for object in objects:
                cuboid2d_points = object[1] + [
                    (object[0][0] * 8, object[0][1] * 8)
                ]
                cuboid3d_points = np.array(cuboid3d_points)
                location = None
                quaternion = None
                obj_2d_points = []
                obj_3d_points = []

                for i in range(8):
                    check_point_2d = cuboid2d_points[i]
                    # Ignore invalid points
                    if check_point_2d is None:
                        continue
                    elif check_point_2d[0] < 0 or check_point_2d[
                            1] < 0 or check_point_2d[
                                0] >= Config.crop_size / Config.stride or check_point_2d[
                                    1] >= Config.crop_size / Config.stride:
                        continue
                    else:
                        check_point_2d = (check_point_2d[0] * Config.stride *
                                          ratio, check_point_2d[1] *
                                          Config.stride * ratio)
                    obj_2d_points.append(check_point_2d)
                    obj_3d_points.append(cuboid3d_points[i])
                centroid = tuple([
                    int(point * Config.stride * ratio) for point in object[0]
                ])
                original_img = cv2.circle(original_img, centroid, 5, -1)
                obj_2d_points = np.array(obj_2d_points, dtype=float)
                obj_3d_points = np.array(obj_3d_points, dtype=float)
                valid_point_count = len(obj_2d_points)
                if valid_point_count >= 5:
                    ret, rvec, tvec = cv2.solvePnP(
                        obj_3d_points,
                        obj_2d_points,
                        matrix_camera,
                        dist_coeffs,
                        flags=cv2.SOLVEPNP_ITERATIVE)
                    if ret:
                        location = list(x[0] for x in tvec)
                        quaternion = convert_rvec_to_quaternion(rvec)

                        projected_points, _ = cv2.projectPoints(
                            cuboid3d_points, rvec, tvec, matrix_camera,
                            dist_coeffs)
                        projected_points = np.squeeze(projected_points)
                        # If the location.Z is negative or object is behind the camera then flip both location and rotation
                        x, y, z = location
                        original_img = cv2.putText(
                            original_img,
                            "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}"
                            .format(x / 10, y / 10, z / 10), (50, 150),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
                        print(
                            "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}"
                            .format(x / 10, y / 10, z / 10))
                        if z < 0:
                            # Get the opposite location
                            location = [-x, -y, -z]

                            # Change the rotation by 180 degree
                            rotate_angle = np.pi
                            rotate_quaternion = Quaternion.from_axis_rotation(
                                location, rotate_angle)
                            quaternion = rotate_quaternion.cross(quaternion)
                        vertexes = [tuple(p) for p in projected_points]
                        plot(original_img, vertexes)
            if args.save:
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir, exist_ok=True)
                output_path = os.path.join(output_dir, img_path.split('/')[-1])
                print('=> Save {}'.format(output_path))
                cv2.imwrite(output_path, original_img)
            if args.plot:
                original_img = cv2.resize(original_img, (600, 600))
                cv2.imshow('prediction', original_img)
                cv2.waitKey(int(1000 / fps))
Exemplo n.º 5
0
valid_data = get_loader(json_path,
                        data_dir,
                        mask_dir,
                        inp_size,
                        feat_stride,
                        preprocess='vgg',
                        training=False,
                        batch_size=batch_size,
                        shuffle=True)
print('val dataset len: {}'.format(len(valid_data.dataset)))

# model
model = get_model(trunk='vgg19')
model = torch.nn.DataParallel(model).cuda()
# load pretrained
use_vgg(model, model_path, 'vgg19')

# Fix the VGG weights first, and then the weights will be released
for i in range(20):
    for param in model.module.model0[i].parameters():
        param.requires_grad = False

trainable_vars = [param for param in model.parameters() if param.requires_grad]
optimizer = torch.optim.SGD(trainable_vars,
                            lr=init_lr,
                            momentum=momentum,
                            weight_decay=weight_decay,
                            nesterov=nesterov)

for epoch in range(5):
    #adjust_learning_rate(optimizer, epoch)
if params.val_nbatch > 0:
    valid_data = get_loader(json_path,
                            data_dir,
                            mask_dir,
                            inp_size,
                            feat_stride,
                            preprocess='vgg',
                            training=False,
                            batch_size=params.batch_size,
                            shuffle=True)
    print('val dataset len: {}'.format(len(valid_data.dataset)))

# model
model = get_model(trunk=trunk)

# load pretrained
if params.ckpt is None:
    use_vgg(model, model_path, trunk)

# Fix the VGG weights first, and then the weights will be released
for i in range(20):
    for param in model.model0[i].parameters():
        param.requires_grad = False

trainable_vars = [param for param in model.parameters() if param.requires_grad]
params.optimizer = torch.optim.SGD(trainable_vars,
                                   lr=params.init_lr,
                                   momentum=momentum,
                                   weight_decay=weight_decay,
                                   nesterov=nesterov)
Exemplo n.º 7
0
def _eval(class_name, path_to_data_dir, path_to_checkpoint, img_prefix):

    # load pre-trained model
    model = get_model(trunk='vgg19')
    model = model.cuda()
    use_vgg(model, './model', 'vgg19')
    print("=> Load pre-trained model from {}".format(path_to_checkpoint))
    model.load_state_dict(torch.load(path_to_checkpoint))
    model.eval()

    # parameter of object size for pnp solver
    print("=> Load {} object size".format(class_name))
    path_to_object_seetings = os.path.join(path_to_data_dir,
                                           '_object_settings.json')
    if not os.path.exists(path_to_object_seetings):
        raise FileNotFoundError(path_to_object_seetings)
    object_list = json.load(open(path_to_object_seetings))['exported_objects']
    object_size = None
    for obj in object_list:
        if obj['class'].find(class_name) != -1:
            object_size = obj['cuboid_dimensions']
    if not object_size:
        raise ValueError("Object size is none")
    _cuboid3d = Cuboid3d(object_size)
    cuboid3d_points = np.array(_cuboid3d.get_vertices())

    # parameter of camera for pnp solver
    path_to_camera_seetings = os.path.join(path_to_data_dir,
                                           '_camera_settings.json')
    if not os.path.exists(path_to_camera_seetings):
        raise FileNotFoundError(path_to_camera_seetings)
    intrinsic_settings = json.load(open(
        path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings']
    matrix_camera = np.zeros((3, 3))
    matrix_camera[0, 0] = intrinsic_settings['fx']
    matrix_camera[1, 1] = intrinsic_settings['fy']
    matrix_camera[0, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[1, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[2, 2] = 1

    try:
        dist_coeffs = np.array(
            json.load(open(path_to_camera_seetings))['camera_settings'][0]
            ["distortion_coefficients"])
    except KeyError:
        dist_coeffs = np.zeros((4, 1))

    # dataloader
    val_dataset = Dataset(path_to_data=path_to_data_dir,
                          class_name=class_name,
                          split='val',
                          img_prefix=img_prefix)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=1,
                                shuffle=False,
                                num_workers=0,
                                drop_last=False)

    correct = 0
    wrong = 0
    # set threshold (cm)
    threshold = 3.0

    for batch_index, (images, _, _, location_targets,
                      ratio) in tqdm(enumerate(val_dataloader)):
        images = images.cuda()
        output, _ = model(images)
        line, vertex = output[0], output[1]
        line, vertex = line.squeeze(), vertex.squeeze()
        objects, peaks = find_objects(vertex, line)
        location_predictions = []
        if len(objects) > 0:
            for object in objects:
                cuboid2d_points = object[1] + [
                    (object[0][0] * 8, object[0][1] * 8)
                ]
                cuboid3d_points = np.array(cuboid3d_points)
                location = None
                quaternion = None
                obj_2d_points = []
                obj_3d_points = []

                for i in range(8):
                    check_point_2d = cuboid2d_points[i]
                    # Ignore invalid points
                    if (check_point_2d is None):
                        continue
                    elif check_point_2d[0] < 0 or check_point_2d[
                            1] < 0 or check_point_2d[
                                0] >= Config.crop_size / Config.stride or check_point_2d[
                                    1] >= Config.crop_size / Config.stride:
                        continue
                    else:
                        check_point_2d = (check_point_2d[0] * Config.stride *
                                          ratio, check_point_2d[1] *
                                          Config.stride * ratio)
                    obj_2d_points.append(check_point_2d)
                    obj_3d_points.append(cuboid3d_points[i])
                projected_points = object[1]
                vertexes = projected_points.copy()
                centroid = tuple([
                    int(point * Config.stride * ratio) for point in object[0]
                ])
                obj_2d_points = np.array(obj_2d_points, dtype=np.float32)
                obj_3d_points = np.array(obj_3d_points, dtype=np.float32)
                valid_point_count = len(obj_2d_points)
                if valid_point_count >= 4:
                    ret, rvec, tvec = cv2.solvePnP(
                        obj_3d_points,
                        obj_2d_points,
                        matrix_camera,
                        dist_coeffs,
                        flags=cv2.SOLVEPNP_ITERATIVE)
                    if ret:
                        location = list(x[0] for x in tvec)
                        quaternion = convert_rvec_to_quaternion(rvec)

                        projected_points, _ = cv2.projectPoints(
                            cuboid3d_points, rvec, tvec, matrix_camera,
                            dist_coeffs)
                        projected_points = np.squeeze(projected_points)
                        # If the location.Z is negative or object is behind the camera then flip both location and rotation
                        x, y, z = location
                        if z < 0:
                            # Get the opposite location
                            location = [-x, -y, -z]
                            # Change the rotation by 180 degree
                            rotate_angle = np.pi
                            rotate_quaternion = Quaternion.from_axis_rotation(
                                location, rotate_angle)
                            quaternion = rotate_quaternion.cross(quaternion)
                        vertexes = [tuple(p) for p in projected_points]
                    location_predictions.append(location)
        location_predictions = np.array(location_predictions)
        if len(location_targets) == 0:
            wrong += len(location_predictions)
        else:
            location_targets = location_targets.cpu().data.numpy()[0]
            for location_target in location_targets:
                distances = [
                    np.sqrt(
                        np.sum(
                            np.square(location_target -
                                      location_prediction / 10.0)))
                    for location_prediction in location_predictions
                ]
                if len(distances) == 0:
                    pass
                    wrong += 1
                elif min(distances) > threshold:
                    wrong += 1
                else:
                    correct += 1

    print('Object: {} Accuracy: {}%'.format(
        class_name, correct / (wrong + correct) * 100.0))