Пример #1
0
def initialize_model():
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        exit()
    else:
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2)
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1],
                                map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    return (yolo, model, averages, angle_bins)
Пример #2
0
    def __init__(self):
        super(SpineModelPAF, self).__init__()
        self.pcm_n = 2
        self.paf_n = 1

        import torchvision.models.vgg as vgg
        vgg19 = vgg.vgg19_bn(pretrained=False)
        top_layers = list(list(vgg19.children())[0].children())
        top_layers[0] = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        tops = top_layers[:33]  # Top 10 (conv batch relu)*10 + maxpool * 3
        tops.pop(26)  # delete third max pool
        [tops.append(l) for l in self.make_conv_layers(512, 256)]
        [tops.append(l) for l in self.make_conv_layers(256, 128)]
        self.model_0 = nn.Sequential(*tops)  # out: 32, 94

        s1_pcm = lambda: self.stage1(self.pcm_n)
        s1_paf = lambda: self.stage1(self.paf_n)
        sn_pcm = lambda: self.stageN(self.pcm_n)
        sn_paf = lambda: self.stageN(self.paf_n)

        self.model1_1 = s1_pcm()
        self.model1_2 = s1_paf()

        self.model2_1 = sn_pcm()
        self.model2_2 = sn_paf()

        self.model3_1 = sn_pcm()
        self.model3_2 = sn_paf()

        self.model4_1 = sn_pcm()
        self.model4_2 = sn_paf()

        self.model5_1 = sn_pcm()
        self.model5_2 = sn_paf()
Пример #3
0
 def __init__(self,
              feature_layer=34,
              use_bn=False,
              use_input_norm=True,
              device=torch.device('cpu'),
              z_norm=False): #Note: PPON uses cuda instead of CPU
     super(VGGFeatureExtractor, self).__init__()
     if use_bn:
         model = vgg.vgg19_bn(pretrained=True)
     else:
         model = vgg.vgg19(pretrained=True)
     self.use_input_norm = use_input_norm
     if self.use_input_norm:
         if z_norm: # if input in range [-1,1]
             mean = torch.Tensor([0.485-1, 0.456-1, 0.406-1]).view(1, 3, 1, 1).to(device) 
             std = torch.Tensor([0.229*2, 0.224*2, 0.225*2]).view(1, 3, 1, 1).to(device)
         else: # input in range [0,1]
             mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)                 
             std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
         self.register_buffer('mean', mean)
         self.register_buffer('std', std)
     self.features = nn.Sequential(*list(model.features.children())[:(feature_layer + 1)])
     # No need to BP to variable
     for k, v in self.features.named_parameters():
         v.requires_grad = False
Пример #4
0
def vgg(**config):
    dataset = config.pop('dataset', 'imagenet')
    depth = config.pop('depth', 16)
    bn = config.pop('bn', True)

    if dataset == 'imagenet':
        config.setdefault('num_classes', 1000)
        if depth == 11:
            if bn is False:
                return vgg11(pretrained=False, **config)
            else:
                return vgg11_bn(pretrained=False, **config)
        if depth == 13:
            if bn is False:
                return vgg13(pretrained=False, **config)
            else:
                return vgg13_bn(pretrained=False, **config)
        if depth == 16:
            if bn is False:
                return vgg16(pretrained=False, **config)
            else:
                return vgg16_bn(pretrained=False, **config)
        if depth == 19:
            if bn is False:
                return vgg19(pretrained=False, **config)
            else:
                return vgg19_bn(pretrained=False, **config)
    elif dataset == 'cifar10':
        config.setdefault('num_classes', 10)
    elif dataset == 'cifar100':
        config.setdefault('num_classes', 100)
    config.setdefault('batch_norm', bn)
    return VGG(model_name[depth], **config)
    def __init__(self):
        super(VGG19_bn,self).__init__()
        self.vgg = vgg19_bn(pretrained=True)

        self.encoder_1 = self.vgg.features[0:6]
        self.encoder_2 = self.vgg.features[6:13]
        self.encoder_3 = self.vgg.features[13:26]
        self.encoder_4 = self.vgg.features[26:39]
        self.encoder_5 = self.vgg.features[39:-1]
Пример #6
0
    def __init__(self):
        rospy.loginfo("pointcloud object detection is running...")

        # frame size
        self.frame_x = 640
        self.frame_y = 480

        self.bridge = CvBridge()

        # cv_image and pcl variables
        self.cv_image = np.zeros([self.frame_x, self.frame_y])
        self.pcl = None

        # transform config
        # self.tf_pub = tf.TransformBroadcaster()

        # load torch
        weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
        model_lst = [
            x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
        ]
        if len(model_lst) == 0:
            print('No previous model found, please train first!')
            exit()
        else:
            print('Using previous model %s' % model_lst[-1])
            my_vgg = vgg.vgg19_bn(pretrained=True)
            self.model = Model.Model(features=my_vgg.features, bins=2).cuda()
            checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.model.eval()

        # load yolo
        yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
        self.yolo = cv_Yolo(yolo_path)

        self.averages = ClassAverages.ClassAverages()

        # TODO: clean up how this is done. flag?
        self.angle_bins = generate_bins(2)

        calib_path = os.path.abspath(
            os.path.dirname(__file__)) + "/" + "camera_cal/"
        self.calib_file = calib_path + "calib_cam_to_cam.txt"

        # subscribers
        self.img_sub = rospy.Subscriber("/kitti/camera_color_right/image_raw",
                                        Image, self.rgb_callback)
        #self.pcl_sub = rospy.Subscriber("/camera/depth_registered/points", PointCloud2, self.pcl_callback)
        # publishers
        self.img_detected_pub = rospy.Publisher(
            "ROS_3D_BBox/img_detected_frame", Image, queue_size=100)
        self.location_pub = rospy.Publisher("ROS_3D_BBox/location_array",
                                            LocationArray,
                                            queue_size=100)
        self.rate = rospy.Rate(1)
Пример #7
0
    def __init__(self, weights_path='../rotation_detector/weights'):

        # load yolo
        self.yolo = cv_Yolo(weights_path)

        # load rotation model
        model_lst = [
            x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
        ]
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True).cpu()
        self.model = Model.Model(features=my_vgg.features, bins=2).cpu()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1],
                                map_location=torch.device('cpu'))
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.angle_bins = generate_bins(2)
Пример #8
0
def vgg_19(batch_norm=True, pretrained=False, fixed_feature=True):
    """ VGG 19-layer model from torchvision's vgg model.

	:param batch_norm: train model with batch normalization
	:param pretrained: if true, return a model pretrained on ImageNet
	:param fixed_feature: if true and pretrained is true, model features are fixed while training.
	"""
    if batch_norm:
        from torchvision.models.vgg import vgg19_bn
        model = vgg19_bn(pretrained)
    else:
        from torchvision.models.vgg import vgg19
        model = vgg19(pretrained)

    ff = True if pretrained and fixed_feature else False
    return _VGG(model, model.features, ff)
    def init_vgg19_params(self):

        vgg19 = vgg.vgg19_bn(pretrained=True)

        blocks = [self.down1, self.down2, self.down3, self.down4, self.down5]

        features = list(vgg19.features.children())

        vgg_layers = []
        for _layer in features:
            if isinstance(_layer, nn.Conv2d):
                vgg_layers.append(_layer)
            elif isinstance(_layer, nn.BatchNorm2d):
                vgg_layers.append(_layer)

        merged_layers = []
        for idx, conv_block in enumerate(blocks):
            if idx < 2:
                units = [conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit]
            else:
                units = [
                    conv_block.conv1.cbr_unit,
                    conv_block.conv2.cbr_unit,
                    conv_block.conv3.cbr_unit,
                    conv_block.conv4.cbr_unit,
                ]
            for _unit in units:
                for _layer in _unit:
                    if isinstance(_layer, nn.Conv2d):
                        merged_layers.append(_layer)
                    elif isinstance(_layer, nn.BatchNorm2d):
                        merged_layers.append(_layer)

        assert len(vgg_layers) == len(merged_layers)

        for l1, l2 in zip(vgg_layers, merged_layers):
            if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d):
                assert l1.weight.size() == l2.weight.size()
                assert l1.bias.size() == l2.bias.size()
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
            elif isinstance(l1, nn.BatchNorm2d) and isinstance(
                    l2, nn.BatchNorm2d):
                l2.running_mean.data = l1.running_mean.data
                l2.running_var.data = l1.running_var.data
                l2.weight.data = l1.weight.data
                l2.bias.data = l1.bias.data
Пример #10
0
    def __init__(self,
                 subtype='vgg16',
                 out_stages=[2, 3, 4],
                 backbone_path=None,
                 pretrained=False):
        super(VGG, self).__init__()
        self.out_stages = out_stages
        self.backbone_path = backbone_path
        self.pretrained = pretrained

        if subtype == 'vgg11':
            self.pretrained = True
            features = vgg11_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        elif subtype == 'vgg13':
            self.pretrained = True
            features = vgg13_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        elif subtype == 'vgg16':
            self.pretrained = True
            features = vgg16_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        elif subtype == 'vgg19':
            self.pretrained = True
            features = vgg19_bn(pretrained=self.pretrained).features
            self.out_channels = [64, 128, 256, 512, 512]
        else:
            raise NotImplementedError

        self.out_channels = self.out_channels[self.out_stages[0]:self.
                                              out_stages[-1] + 1]

        self.conv1 = nn.Sequential(*list(features.children())[:7])
        self.layer1 = nn.Sequential(*list(features.children())[7:14])
        self.layer2 = nn.Sequential(*list(features.children())[14:24])
        self.layer3 = nn.Sequential(*list(features.children())[24:34])
        self.layer4 = nn.Sequential(*list(features.children())[34:43])

        if not self.pretrained:
            if self.backbone_path:
                self.pretrained = True
                self.backbone.load_state_dict(torch.load(self.backbone_path))
            else:
                self.init_weights()
 def __init__(self):
     super(ColorizationNetwork_L, self).__init__()
     self.conv1 = nn.Conv2d(in_channels=1,
                            out_channels=64,
                            kernel_size=(3, 3),
                            stride=(1, 1),
                            padding=(1, 1))
     self.VGG_19 = vgg.vgg19_bn(pretrained=True)  #[batch, 256, 56*, 56*]
     self.VGG_19.classifier = nn.Sequential()
     self.l = list(self.VGG_19.features.children())
     del self.l[52]
     del self.l[39]
     del self.l[0]
     self.VGG_modified = nn.Sequential(*self.l)
     self.conv_8 = nn.Sequential(
         nn.ConvTranspose2d(in_channels=512,
                            out_channels=256,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            dilation=1),
         nn.ReLU(inplace=True),  #[batch, 128, 56*, 56*]
         nn.Conv2d(in_channels=256,
                   out_channels=256,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   dilation=1),
         nn.ReLU(inplace=True),  #[batch, 128, 56*, 56*]
         nn.Conv2d(in_channels=256,
                   out_channels=256,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   dilation=1),
         nn.ReLU(inplace=True),  #[batch, 128, 56*, 56*]
         nn.Conv2d(in_channels=256,
                   out_channels=313,
                   kernel_size=1,
                   stride=1,
                   dilation=1)
         #[batch, 313, 56*, 56*]
     )
Пример #12
0
def make_compatible_VGG(input_shape):
    '''Creates a modified VGG_19 batch-normalized network usuable for binary classification;
        this CNN is compatible with arbitrary-sized images as input
        (as opposed to the 224x224 images originally used by VGG)

    Parameters:
        input_shape: Desired shape of input image

    Returns:
        torch.nn.Module 19-layer VGG model; this model includes batch normalization
            and is derived from the pretrained torch VGG model
    '''

    base_nn = vgg.vgg19_bn(
        pretrained=True
    )  # Setting num_classes here blows up importing pretrained model

    # Freeze the parameters for the convolutional part of the network; will be modifying only fully-connected layers
    for param in base_nn.features.parameters():
        param.requires_grad = False

    # Modify dimensions of first linear layer based on input image
    # First calculate final feature map size after several max pools (VGG's conv2ds don't change size)
    out_size = []
    for dim_size in input_shape:
        size = dim_size
        for i in range(5):
            size -= 2
            size /= 2
            size = int(size) + 1
        out_size.append(size)

    base_nn.classifier[0] = nn.Linear(512 * out_size[0] * out_size[1], 4096)
    nn.init.normal_(base_nn.classifier[0].weight, 0, 0.01)
    nn.init.constant_(base_nn.classifier[-1].bias, 0)

    # Now set the number of classes
    base_nn.classifier[-1] = nn.Linear(4096, 2)
    nn.init.normal_(base_nn.classifier[-1].weight, 0, 0.01)
    nn.init.constant_(base_nn.classifier[-1].bias, 0)

    return base_nn
Пример #13
0
    def __init__(self, num_classes, pretrained=True):
        super(SegNet, self).__init__()
        vgg = vgg19_bn(pretrained=pretrained)

        features = list(vgg.features.children())
        self.enc1 = nn.Sequential(*features[0:7])
        self.enc2 = nn.Sequential(*features[7:14])
        self.enc3 = nn.Sequential(*features[14:27])
        self.enc4 = nn.Sequential(*features[27:40])
        self.enc5 = nn.Sequential(*features[40:])

        self.dec5 = nn.Sequential(*([nn.Upsample(scale_factor=2)] + [
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True)
        ] * 4))
        self.dec4 = _DecoderBlock(1024, 256, 4, last=False)
        self.dec3 = _DecoderBlock(512, 128, 4, last=False)
        self.dec2 = _DecoderBlock(256, 64, 2, last=False)
        self.dec1 = _DecoderBlock(128, num_classes, 2, last=True)
        initialize_weights(self.dec5, self.dec4, self.dec3, self.dec2,
                           self.dec1)
Пример #14
0
 def test_vgg19_bn(self):
     # VGG 19-layer model (configuration 'E') with batch normalization
     x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
     self.exportTest(toC(vgg19_bn()), toC(x))
Пример #15
0
def main():

    FLAGS = parser.parse_args()

    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2).cuda()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir
    # using P_rect from global calibration file
    calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir
    calib_file = calib_path + "calib_cam_to_cam_custom.txt"

    # using P from each frame
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/'

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for img_id in ids:

        start_time = time.time()

        img_file = img_path + img_id + ".png"

        # P for each frame
        # calib_file = calib_path + id + ".txt"
        pad_image = False
        if pad_image:
            truth_img = cv2.imread(img_file)
            truth_img = cv2.resize(truth_img, (374, 374))
            height, width, channels = truth_img.shape
            width_pad = 1242
            height_pad = 375
            center_height = height_pad // 2
            center_width = width_pad // 2
            img_pad = np.zeros([height_pad, width_pad, 3], dtype=np.uint8)
            start_height = center_height - height // 2
            stop_height = center_height + height // 2
            start_width = center_width - width // 2
            stop_width = center_width + width // 2
            img_pad[start_height:stop_height,
                    start_width:stop_width, :] = truth_img
            truth_img = img_pad
            img = np.copy(img_pad)
            yolo_img = np.copy(img_pad)
            detections = yolo.detect(yolo_img)
        else:
            truth_img = cv2.imread(img_file)
            img = np.copy(truth_img)
            yolo_img = np.copy(truth_img)

            detections = yolo.detect(yolo_img)

        for detection in detections:

            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            print('cos:', cos)
            print('sin:', sin)
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]

            alpha -= np.pi
            my_alpha = alpha - np.pi / 2
            print('new cos', np.cos(my_alpha))
            print('new sin', np.sin(my_alpha))
            print('adding', angle_bins[argmax])
            print('confidence', conf)
            print('ANGLE', (my_alpha % (2 * np.pi)) / (2 * np.pi) * 360)
            print(theta_ray)

            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)

            if not FLAGS.hide_debug:
                print('Estimated pose: %s' % location)

        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('SPACE for next image, any other key to exit',
                       numpy_vertical)
        else:
            cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print("\n")
            print('Got %s poses in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')

        if FLAGS.video:
            cv2.waitKey(1)
        else:
            if cv2.waitKey(0) != 32:  # space bar
                exit()
Пример #16
0
 def __init__(self):
     super(Encoder, self).__init__()
     self.original_model = vgg.vgg19_bn(pretrained=True)
     self.convs = list(self.original_model.children())[0]
     self.layers = nn.Sequential(*list(self.convs)[:-1])
Пример #17
0
def main():
    root = os.path.dirname(os.path.abspath(__file__))
    weights_path = root + '/weights'
    model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')]
    assert len(model_lst)>0, 'No previous model found, please train first!'

    print ('Using previous model %s'%model_lst[-1])
    my_vgg = vgg.vgg19_bn(pretrained=False)
    model = Model.Model(features=my_vgg.features, bins=2).cuda()
    checkpoint = torch.load(weights_path + '/%s'%model_lst[-1])
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # defaults to /eval
    dataset = Dataset(root + '/eval')
    averages = ClassAverages.ClassAverages()

    all_images = dataset.all_objects()
    for key in sorted(all_images.keys()):
        start_time = time.time()
        data = all_images[key]

        truth_img = data['Image']
        img = np.copy(truth_img)
        objects = data['Objects']
        cam_to_img = data['Calib']

        for detectedObject in objects:
            label = detectedObject.label
            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img

            input_tensor = torch.zeros([1,3,224,224]).cuda()
            input_tensor[0,:,:,:] = input_img
            input_tensor.cuda()

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(label['Class'])

            argmax = np.argmax(conf)
            cos, sin = orient[argmax, :2]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax] - np.pi

            location = plot_regressed_3d_bbox(img, truth_img, cam_to_img, label['Box_2D'], dim, alpha, theta_ray)
            print('Truth pose: %s\nEstimated location: %s'%(label['Location'], location)) # x,y,z

            # plot car by car
            if single_car:
                numpy_vertical = np.concatenate((truth_img, img), axis=0)
                cv2.imshow('3D-DeepBox', numpy_vertical); cv2.waitKey(0)

        print('Got %s poses in %.3f seconds\n'%(len(objects), time.time()-start_time))

        # plot image by image
        if not single_car:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('3D-DeepBox', numpy_vertical)
            if cv2.waitKey(0) == 27: return
Пример #18
0
def main():

    # hyper parameters
    epochs = 100
    batch_size = 8
    alpha = 0.6
    w = 0.4

    train_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/training'
    dataset = Dataset(train_path)

    params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 6}

    generator = data.DataLoader(dataset, **params)

    my_vgg = vgg.vgg19_bn(pretrained=True)
    model = Model(features=my_vgg.features).cuda()
    opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    conf_loss_func = nn.CrossEntropyLoss().cuda()
    dim_loss_func = nn.MSELoss().cuda()
    orient_loss_func = OrientationLoss

    # load any previous weights
    model_path = os.path.abspath(
        os.path.dirname(__file__)) + '/weights/back_up/'
    latest_model = None
    first_epoch = 0
    if not os.path.isdir(model_path):
        os.mkdir(model_path)
    else:
        try:
            latest_model = [
                x for x in sorted(os.listdir(model_path)) if x.endswith('.pkl')
            ][-1]
        except:
            pass

    if latest_model is not None:
        checkpoint = torch.load(model_path + latest_model)
        model.load_state_dict(checkpoint['model_state_dict'])
        opt_SGD.load_state_dict(checkpoint['optimizer_state_dict'])
        first_epoch = checkpoint['epoch']
        loss = checkpoint['loss']

        print('Found previous checkpoint: %s at epoch %s' %
              (latest_model, first_epoch))
        print('Resuming training....')

    total_num_batches = int(len(dataset) / batch_size)

    for epoch in range(first_epoch + 1, epochs + 1):
        curr_batch = 0
        passes = 0
        print("Loading all detected objects in dataset...")
        for local_batch, local_labels in generator:

            truth_orient = local_labels['Orientation'].float().cuda()
            truth_conf = local_labels['Confidence'].long().cuda()
            truth_dim = local_labels['Dimensions'].float().cuda()

            local_batch = local_batch.float().cuda()
            [orient, conf, dim] = model(local_batch)

            orient_loss = orient_loss_func(orient, truth_orient, truth_conf)
            dim_loss = dim_loss_func(dim, truth_dim)

            truth_conf = torch.max(truth_conf, dim=1)[1]
            conf_loss = conf_loss_func(conf, truth_conf)

            loss_theta = conf_loss + w * orient_loss
            loss = alpha * dim_loss + loss_theta

            opt_SGD.zero_grad()
            loss.backward()
            opt_SGD.step()

            if passes % 10 == 0:
                print("--- epoch %s | batch %s/%s --- [loss: %s]" %
                      (epoch, curr_batch, total_num_batches, loss.item()))
                passes = 0

            passes += 1
            curr_batch += 1

        # save after every 10 epochs
        if epoch % 10 == 0:
            name = model_path + 'epoch_%s.pkl' % epoch
            print("====================")
            print("Done with epoch %s!" % epoch)
            print("Saving weights as %s ..." % name)
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': opt_SGD.state_dict(),
                    'loss': loss
                }, name)
            print("====================")
            print('epoch', epoch)
            print('model_state_dict', model.state_dict())
            print('optimizer_state_dict', opt_SGD.state_dict())
            print('loss', loss)
Пример #19
0
    epochs = config["epochs"]
    batches = config["batches"]
    bins = config["bins"]
    alpha = config["alpha"]
    w = config["w"]

    print("load train data!")
    print("load val data!")

    data = Dataset.ImageDataset(path + "/training")
    #print("data:")
    data = Dataset.BatchDataset(data, batches, bins)

    if len(model_list) == 0:
        print("No previous model found, start training!")
        vgg = vgg.vgg19_bn(pretrained=True)
        model = Model.Model(features=vgg.features, bins=bins).cuda()
    else:
        print("Find previous model %s" % model_list[-1])
        vgg = vgg.vgg19_bn(pretrained=False)
        model = Model.Model(features=vgg.features, bins=bins).cuda()
        param = torch.load(model_path + "/%s" % model_list[-1])
        model.load_state_dict(param)

    opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    dim_LossFunc = nn.MSELoss().cuda()
    conf_LossFunc = nn.CrossEntropyLoss().cuda()
    #print("33333",float(data.num_of_patch)/batches)
    iter_each_time = round(float(data.num_of_patch) / batches)
    for epoch in range(epochs):
        for i in range(int(iter_each_time)):
Пример #20
0
 def test_vgg19_bn(self):
     # VGG 19-layer model (configuration 'E') with batch normalization
     x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0))
     self.exportTest(toC(vgg19_bn()), toC(x))
Пример #21
0
 def test_vgg19_bn(self):
     self.run_model_test(vgg19_bn(), train=False,
                         batch_size=BATCH_SIZE)
Пример #22
0
def main():
    root = os.path.dirname(os.path.abspath(__file__))
    weights_path = root + '/weights'
    cam = cv2.VideoCapture(0)
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    assert len(model_lst) > 0, 'No previous model found, please train first!'

    print('Using previous model %s' % model_lst[-1])
    my_vgg = vgg.vgg19_bn(pretrained=False)
    # TODO: load bins from file or something
    model = Model.Model(features=my_vgg.features, bins=2).cuda()
    checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    # load yolo
    yolo_path = root + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    FLAGS = parser.parse_args()
    cal_dir = FLAGS.cal_dir
    # using P_rect from global calibration file
    calib_path = root + '/' + cal_dir
    calib_file = calib_path + 'calib_cam_to_cam.txt'

    # using P from each frame
    # calib_path = root + '/Kitti/testing/calib/'

    while cv2.waitKey(5) != 27:
        # P for each frame
        # calib_file = calib_path + id + '.txt'

        ret, truth_img = cam.read()
        if not ret: continue
        start_time = time.time()
        img = truth_img.copy()
        yolo_img = truth_img.copy()
        detections = yolo.detect(yolo_img)

        for detection in detections:
            if not averages.recognized_class(detection.detected_class):
                continue

            # This is throwing when the 2d bbox is invalid
            # TODO: better check
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]
            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            cos, sin = orient[argmax, :2]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax] - np.pi

            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)
            if not FLAGS.hide_debug:
                print('Estimated location: %s' % location)  # x,y,z

        if not FLAGS.hide_debug:
            print('Got %s poses in %.3f seconds\n' %
                  (len(detections), time.time() - start_time))

        if FLAGS.show_yolo:
            img = np.concatenate((truth_img, img), axis=0)
        cv2.imshow('3D-DeepBox', img)
Пример #23
0
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision.models import vgg



if __name__ == '__main__':
    bins = 8
    w = 1
    alpha = 1
    data = Dataset.ImageDataset('../../Kitti/training')
    data = Dataset.BatchDataset(data, 8, bins)
    #'''
    #vgg = torch.load('model/vgg16.pkl').cuda()
    vgg = vgg.vgg19_bn(pretrained=True) 
    #param = torch.load('model.pkl')
    model = Model.Model(features=vgg.features, bins=bins).cuda()
    #model.load_state_dict(param)

    opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    dim_LossFunc = nn.MSELoss().cuda()
    conf_LossFunc = nn.CrossEntropyLoss().cuda()
    for epoch in range(25):
        for i in range(5000):
            batch, confidence, confidence_multi, ntheta, angleDiff, dimGT, LocalAngle, Ry, ThetaRay = data.Next()
            confidence_arg = np.argmax(confidence, axis = 1)
            batch = Variable(torch.FloatTensor(batch), requires_grad=False).cuda()
            confidence = Variable(torch.LongTensor(confidence.astype(np.int)), requires_grad=False).cuda()
            confidence_multi = Variable(torch.LongTensor(confidence_multi.astype(np.int)), requires_grad=False).cuda()
            ntheta = Variable(torch.FloatTensor(ntheta), requires_grad=False).cuda() 
Пример #24
0
def convert_to_image(array, size=256):
    img = np.transpose(array, [1, 2, 0])
    img = img * 127.5 + 127.5
    img = np.clip(img, 0, 255).astype(np.uint8)
    return cv2.resize(img, (size, size))


label_placeholder = tf.placeholder(tf.float32, [None, 121])
synthesise = Gs.get_output_for(latent, label_placeholder)

weights_path = '../rotation_detector/weights'
model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')]

print('Using previous model %s' % model_lst[-1])
my_vgg = vgg.vgg19_bn(pretrained=True).cpu()

model = Model.Model(features=my_vgg.features, bins=2).cpu()
checkpoint = torch.load(weights_path + '/%s' % model_lst[-1],
                        map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# load yolo
yolo_path = '../rotation_detector/weights'
yolo = cv_Yolo(yolo_path)

averages = ClassAverages.ClassAverages()
angle_bins = generate_bins(2)

for angle in range(0, 360, 30):
Пример #25
0
def main():

    FLAGS = parser.parse_args()

    # load torch
    weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])
        my_vgg = vgg.vgg19_bn(pretrained=True)
        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2).cuda()
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1])
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights'
    yolo = cv_Yolo(yolo_path)

    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    image_dir = FLAGS.image_dir
    cal_dir = FLAGS.cal_dir
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir
    # using P_rect from global calibration file
    calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir
    # calib_file = calib_path + "calib_cam_to_cam.txt"

    # using P from each frame
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/'

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for id in ids:

        start_time = time.time()

        img_file = img_path + id + ".png"

        # P for each frame
        calib_file = calib_path + id + ".txt"

        #comp_img = np.array(Image.open(img_file).convert('RGB'))
        truth_img = cv2.imread(img_file)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)

        detections = yolo.detect(img_file)

        for detection in detections:

            if not averages.recognized_class(detection.detected_class):
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            #try:
            object = DetectedObject(img, detection.detected_class,
                                    detection.box_2d, calib_file)
            #except:
            #    continue

            theta_ray = object.theta_ray
            input_img = object.img
            proj_matrix = object.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224]).cuda()
            input_tensor[0, :, :, :] = input_img

            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]

            dim += averages.get_item(detected_class)

            argmax = np.argmax(conf)
            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi

            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)

            if not FLAGS.hide_debug:
                print('Estimated pose: %s' % location)

        if FLAGS.show_yolo:
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imwrite(os.path.join('output', id + '_yolo.png'),
                        numpy_vertical)
            #cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical)
        else:
            cv2.imwrite(os.path.join('output', id + '_3d.png'), img)
            #cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print("\n")
            print('Got %s poses in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')
Пример #26
0
def load_model(model_name, classes=1000, pretrained=True, in_channels=3):
    """Load the specified VGG architecture for ImageNet
  
    Args:
      model_name: VGG architecture type
      classes: number of predicted classes
      pretrained: load pretrained network on ImageNet
  """
    if pretrained:
        assert classes == 1000, "Pretrained models are provided only for Imagenet."

    kwargs = {'num_classes': classes}

    if model_name == 'vgg11':
        net = VGG.vgg11(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg13':
        net = VGG.vgg13(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg16':
        net = VGG.vgg16(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg19':
        net = VGG.vgg19(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg11bn':
        net = VGG.vgg11_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg13bn':
        net = VGG.vgg13_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg16bn':
        net = VGG.vgg16_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg19bn':
        net = VGG.vgg19_bn(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'vgg19_orig':
        net = VGG.vgg19(pretrained=False, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
            net.features[0] = input_layer
        init_weights_vgg_orig(net)
    elif model_name == 'alexnet':
        net = AlexNet(pretrained=pretrained, **kwargs)
        if in_channels != 3:
            input_layer = nn.Conv2d(in_channels,
                                    64,
                                    kernel_size=11,
                                    stride=4,
                                    padding=2)
            nn.init.kaiming_normal_(input_layer.weight,
                                    mode='fan_out',
                                    nonlinearity='relu')
            input_layer.bias.data.zero_()
            net.features[0] = input_layer
    elif model_name == 'lenet':
        kwargs['in_channels'] = in_channels
        net = lenet(**kwargs)
    else:
        raise ValueError("Unsupported model architecture.")
    return net
Пример #27
0
    img[:, :, 2] = batch[0, 0, :, :]
    return img

if __name__ == '__main__':
    bins = 8
    w = 1
    alpha = 1
    path = '../../Kitti/training'
    kittiData = kitti.KITTIObjectsReader(path)
    #print kittiData.getFrameInfo(0)['calibration']
    #sys.exit()
    data = Dataset.ImageDataset(path)
    data = Dataset.BatchDataset(data, 1, bins, mode='eval')
    #print 'a'    
    param = torch.load('model.pkl')
    VGG = vgg.vgg19_bn(pretrained=False)
    model = Model(features=VGG.features, bins=bins).cuda()
    model.load_state_dict(param)
    model.eval()

    total = 0
    error_lst = []
    distance_lst = []
    for epoch in range(1):
        for i in range(5000):
            #data.idx = 10
            batch, centerAngle, info = data.EvalBatch()
            P = kittiData.getFrameInfo(info['Index'])['calibration'] ['projection_left']
            box_2D = info['Box_2D']
            dimGT = info['Dimension']
            angle = info['LocalAngle'] / np.pi * 180
Пример #28
0
def main():

    # 默认值:cal_dir='camera_cal/', hide_debug=False, image_dir='eval/image_2/', show_yolo=False, video=False
    FLAGS = parser.parse_args()

    # 注意:总共有两个权重文件,一个是yolo2D检测的yolov3.weights权重文件
    # 一个是自己训练的回归维度和alpha的权重文件,命名为epoch_10.pkl
    weights_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep
    model_lst = [
        x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')
    ]
    if len(model_lst) == 0:
        print('No previous model found, please train first!')
        exit()
    else:
        print('Using previous model %s' % model_lst[-1])

        # 采用vgg19_bn来提取图片的特征,该特征作为后面3个branch的输入特征
        # TODO 是否要换成VGG16_bn?
        my_vgg = vgg.vgg19_bn(pretrained=True)

        # TODO: load bins from file or something
        model = Model.Model(features=my_vgg.features, bins=2)

        # 在CPU上进行测试
        checkpoint = torch.load(weights_path + '/%s' % model_lst[-1],
                                map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()

    # load yolo
    yolo_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep
    yolo = cv_Yolo(yolo_path)

    # 训练集中统计的各个class的维度统计信息
    averages = ClassAverages.ClassAverages()

    # TODO: clean up how this is done. flag?
    angle_bins = generate_bins(2)

    # 待检测图片的途径
    image_dir = FLAGS.image_dir

    # 当所有的图片用的是同一个proj_matrix时,应该将该proj_matrix放在该目录下
    cal_dir = FLAGS.cal_dir

    # FLAGS.video默认为false
    if FLAGS.video:
        if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/":
            image_dir = "eval/video/2011_09_26/image_2/"
            cal_dir = "eval/video/2011_09_26/"

    img_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + image_dir
    # using P_rect from global calibration file
    # calib_path = os.path.abspath(os.path.dirname(__file__)) + os.path.sep + cal_dir
    # calib_file = calib_path + "calib_cam_to_cam.txt"

    # using P from each frame
    calib_path = os.path.abspath(os.path.dirname(
        __file__)) + os.path.sep + 'eval' + os.path.sep + 'calib' + os.path.sep

    try:
        ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))]
    except:
        print("\nError: no images in %s" % img_path)
        exit()

    for img_id in ids:

        start_time = time.time()

        img_file = img_path + img_id + ".png"

        # P for each frame
        calib_file = calib_path + img_id + ".txt"

        truth_img = cv2.imread(img_file)
        img = np.copy(truth_img)
        yolo_img = np.copy(truth_img)

        # yolo检测出来的结果为2d像素坐标和类别
        detections = yolo.detect(yolo_img)

        for detection in detections:

            # 检测的类别必须出现在KITTI数据集的枚举的类别中,如果不在,那么忽视这个被检测出来的类别
            # 因为yolo定义的类别数量是比KITTI数据集的类别数量多,所以可能yolo检测出了一个类别,但没有出现
            # 在KITTI数据集的枚举类别中
            if not averages.recognized_class(detection.detected_class):
                print('class ' + detection.detected_class +
                      ' is not in KITTI class, so ignore this class')
                continue

            # this is throwing when the 2d bbox is invalid
            # TODO: better check
            # 将图像 以及检测到的类别,2D框 以及对应这张图像的proj_matrix作为参数传入到DetectedObject类的init()函数中
            try:
                detectedObject = DetectedObject(img, detection.detected_class,
                                                detection.box_2d, calib_file)
            except:
                print("yolo检测错误,2D框无效!")
                continue

            theta_ray = detectedObject.theta_ray
            input_img = detectedObject.img
            proj_matrix = detectedObject.proj_matrix
            box_2d = detection.box_2d
            detected_class = detection.detected_class

            input_tensor = torch.zeros([1, 3, 224, 224])
            input_tensor[0, :, :, :] = input_img

            # 得到预测的orient,conf,dim
            [orient, conf, dim] = model(input_tensor)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]
            dim += averages.get_item(detected_class)
            # 取conf大的那个bin,将该bin对应的orient的值赋值给最终的orient
            argmax = np.argmax(conf)
            orient = orient[argmax, :]

            # 得到预测出来的cos值和sin值
            # cos值在训练集中是cos(angle_diff),sin值在训练集中是sin(angle_diff)
            # 而angle_diff是真实的alpha(经过扩展到0-2pi)与对应的bin的夹角
            cos = orient[0]
            sin = orient[1]

            # np.arctan2传入sin为y轴坐标
            # cos为x轴坐标
            # 返回弧度制角度 -pi~+pi
            # 参考https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.arctan2.html
            alpha = np.arctan2(sin, cos)
            alpha += angle_bins[argmax]
            alpha -= np.pi  # 得到最终的alpha的值

            # 展示2D检测效果,默认不展示
            if FLAGS.show_yolo:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray,
                                                  truth_img)
            else:
                location = plot_regressed_3d_bbox(img, proj_matrix, box_2d,
                                                  dim, alpha, theta_ray)

            if not FLAGS.hide_debug:  # FLAGS.hide_debug默认为False

                # 对于每一个检测到的类输出其位置信息。为了保证与KITTI数据集中的一致
                # 进行 location[1] += dim[0]
                location[1] += dim[0] / 2
                print('Estimated pose: %s' % location)

        if FLAGS.show_yolo:  # FLAGS.show_yolo默认为False
            numpy_vertical = np.concatenate((truth_img, img), axis=0)
            cv2.imshow('SPACE for next image, any other key to exit',
                       numpy_vertical)
        else:
            cv2.imshow('3D detections', img)

        if not FLAGS.hide_debug:
            print('Got %s detect class in %.3f seconds' %
                  (len(detections), time.time() - start_time))
            print('-------------')

        if FLAGS.video:
            cv2.waitKey(1)
        else:
            if cv2.waitKey(0) != 32:  # space bar
                exit()
Пример #29
0
def main():

    store_path = os.path.abspath(os.path.dirname(__file__)) + '/models'
    model_lst = [x for x in sorted(os.listdir(store_path)) if x.endswith('.pkl')]
    if len(model_lst) == 0:
        print 'No previous model found, please check it'
        exit()
    else:
        print 'Find previous model %s'%model_lst[-1]
        vgg = V.vgg19_bn(pretrained=False)
        model = Model.Model(features=vgg.features, bins=2).cuda()
        params = torch.load(store_path + '/%s'%model_lst[-1])
        model.load_state_dict(params)
        model.eval()

    dataset = Dataset(os.path.abspath(os.path.dirname(__file__)) + '/eval')

    bins = model.bins
    centerAngle = np.zeros(bins)
    interval = 2 * np.pi / bins
    for i in range(1, bins):
        centerAngle[i] = i*interval


    for data in dataset:
        truth_img = data['Image']
        img = np.copy(truth_img)
        objects = data['Objects']
        cam_to_img = data['Calib']

        for object in objects:
            label = object.label
            theta_ray = object.theta_ray
            batch = object.img

            alpha = label['Alpha']
            dimensions = label['Dimensions']

            batch = Variable(torch.FloatTensor(batch), requires_grad=False).cuda()
            [orient, conf, dim] = model(batch)
            orient = orient.cpu().data.numpy()[0, :, :]
            conf = conf.cpu().data.numpy()[0, :]
            dim = dim.cpu().data.numpy()[0, :]
            argmax = np.argmax(conf)

            orient = orient[argmax, :]
            cos = orient[0]
            sin = orient[1]
            theta = np.arctan2(sin, cos)
            theta = theta + centerAngle[argmax]

            print theta
            print alpha

            exit()




            plot_regressed_3d_bbox(img, truth_img, label['Box_2D'], dim, alpha, theta_ray, cam_to_img, label)

        numpy_vertical = np.concatenate((truth_img, img), axis=0)
        cv2.imshow('2D detection on top, 3D prediction on bottom', numpy_vertical)
        cv2.waitKey(0)
Пример #30
0
def recordVGG(info):
    global SKIP
    import torchvision.models.vgg as vggGen

    if not (SKIP and 'vgg11' in info['name_list']):
        INFO("proceeding for VGG11...")
        net = vggGen.vgg11(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg11')
    else:
        INFO("Skip VGG11")

    if not (SKIP and 'vgg13' in info['name_list']):
        INFO("proceeding for VGG13...")
        net = vggGen.vgg13(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg13')
    else:
        INFO("Skip VGG13")

    if not (SKIP and 'vgg16' in info['name_list']):
        INFO("proceeding for VGG16...")
        net = vggGen.vgg16(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg16')
    else:
        INFO("Skip VGG16")

    if not (SKIP and 'vgg19' in info['name_list']):
        INFO("proceeding for VGG19...")
        net = vggGen.vgg19(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg19')
    else:
        INFO("Skip VGG19")

    if not (SKIP and 'vgg11_bn' in info['name_list']):
        INFO("proceeding for VGG11_bn...")
        net = vggGen.vgg11_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg11_bn')
    else:
        INFO("Skip VGG11_bn")

    if not (SKIP and 'vgg13_bn' in info['name_list']):
        INFO("proceeding for VGG13_bn...")
        net = vggGen.vgg13_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg13_bn')
    else:
        INFO("Skip VGG13_bn")

    if not (SKIP and 'vgg16_bn' in info['name_list']):
        INFO("proceeding for VGG16_bn...")
        net = vggGen.vgg16_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg16_bn')
    else:
        INFO("Skip VGG16_bn")

    if not (SKIP and 'vgg19_bn' in info['name_list']):
        INFO("proceeding for VGG19_bn...")
        net = vggGen.vgg19_bn(pretrained=True).cuda()
        sum = __summary(net, [3, 224, 224], verbose=True)
        __writeInfoJSON(sum, 'vgg19_bn')
    else:
        INFO("Skip VGG19_bn")
Пример #31
0
def main():

    # hyper parameters
    epochs = 100
    batch_size = 8  # 批训练数据的个数
    alpha = 0.6
    w = 0.4

    print("Loading all detected objects in dataset...")

    # 找到训练集的路径,目录默认为 ./Kitti/training/
    train_path = os.path.abspath(
        os.path.dirname(__file__)
    ) + os.path.sep + 'Kitti' + os.path.sep + 'training' + os.path.sep

    # 执行Dataset()的init函数
    dataset = Dataset(train_path)

    # shuffle为true表示打乱数据 ,num_works线程个数
    params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 6}

    generator = data.DataLoader(dataset, **params)

    my_vgg = vgg.vgg19_bn(pretrained=True)
    model = Model(features=my_vgg.features)
    opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    conf_loss_func = nn.CrossEntropyLoss()
    dim_loss_func = nn.MSELoss()

    # 对于orient的损失函数,采用自定义的损失函数
    orient_loss_func = OrientationLoss

    # load any previous weights
    model_path = os.path.abspath(
        os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep
    latest_model = None
    first_epoch = 0
    if not os.path.isdir(model_path):
        os.mkdir(model_path)
    else:
        try:
            latest_model = [
                x for x in sorted(os.listdir(model_path)) if x.endswith('.pkl')
            ][-1]
        except:
            pass

    if latest_model is not None:
        checkpoint = torch.load(
            model_path + latest_model,
            map_location=torch.device('cpu'))  # 加载epoch_10.pkl文件
        model.load_state_dict(checkpoint['model_state_dict'])
        opt_SGD.load_state_dict(checkpoint['optimizer_state_dict'])
        first_epoch = checkpoint['epoch']
        loss = checkpoint['loss']

        print('Found previous checkpoint: %s at epoch %s' %
              (latest_model, first_epoch))
        print('Resuming training....')

    total_num_batches = int(len(dataset) / batch_size)

    for epoch in range(first_epoch + 1, epochs + 1):
        curr_batch = 0
        passes = 0
        for local_batch, local_labels in generator:

            # Orientation是根据angle角与bin的中心角度的差计算的cos和sin的值
            # 注意此处的bin是angle落在哪个bin中,没落的bin对应者的orient为0,0
            truth_orient = local_labels['Orientation'].float()

            # 根据label中angle落在哪个bin上,得到的confidence信息,由于本文设置的bin
            # 的个数为2,所以对于每一个label标签中的每一行,Confidence都是1*2矩阵
            truth_conf = local_labels['Confidence'].long()

            # 标签中的真正的维度信息,经过了减去类别均值的操作
            truth_dim = local_labels['Dimensions'].float()

            local_batch = local_batch.float()

            # 数据送入到模型中,得到预测的结果
            [orient, conf, dim] = model(local_batch)

            orient_loss = orient_loss_func(orient, truth_orient, truth_conf)
            dim_loss = dim_loss_func(dim, truth_dim)

            # 返回的是truth_conf为1的的索引下标
            truth_conf = torch.max(truth_conf, dim=1)[1]
            conf_loss = conf_loss_func(conf, truth_conf)

            loss_theta = conf_loss + w * orient_loss
            loss = alpha * dim_loss + loss_theta

            opt_SGD.zero_grad()
            loss.backward()
            opt_SGD.step()

            if passes % 10 == 0:
                print("--- epoch %s | batch %s/%s --- [loss: %s]" %
                      (epoch, curr_batch, total_num_batches, loss.item()))
                passes = 0

            passes += 1
            curr_batch += 1

        # save after every 10 epochs
        if epoch % 10 == 0:
            name = model_path + 'epoch_%s.pkl' % epoch
            print("====================")
            print("Done with epoch %s!" % epoch)
            print("Saving weights as %s ..." % name)
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': opt_SGD.state_dict(),
                    'loss': loss
                }, name)
            print("====================")
Пример #32
0
 def test_vgg19_bn(self):
     self.run_model_test(vgg19_bn(), train=False,
                         batch_size=BATCH_SIZE)