Exemplo n.º 1
0
def train_hs512(lr=1e-4,
                freeze_bn=False,
                optim=None,
                batch_size=8,
                weights_path=None,
                save_weights_only=True,
                epochs=25):
    if weights_path is None:
        weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5'

    img_height = 512  # 1088 // 2  # Height of the input images
    img_width = 512  # 2048 // 2  # Width of the input images
    img_channels = 3  # Number of color channels of the input images
    # subtract_mean = [104, 117, 123] # The per-channel mean of the images in the dataset
    subtract_mean = [138, 138,
                     138]  # The per-channel mean of the images in the dataset
    swap_channels = False
    n_classes = 20  # The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
    scales = [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]  # MS COCO scales
    # scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05]
    aspect_ratios = [
        [1.0,
         2.0, 0.5], [1.0, 2.0, 0.5, 3.0,
                     1.0 / 3.0],
        [1.0, 2.0, 0.5, 3.0,
         1.0 / 3.0],
        [1.0, 2.0, 0.5,
         3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0
                           ], [1.0, 2.0, 0.5
                               ], [1.0, 2.0, 0.5]
    ]  # The anchor box aspect ratios used in the original SSD300; the order matters
    two_boxes_for_ar1 = True
    # The space between two adjacent anchor box center points for each predictor layer.
    steps = [8, 16, 32, 64, 128, 256, 512]
    offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    limit_boxes = False  # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
    variances = [0.1, 0.1, 0.2, 0.2]
    coords = 'centroids'
    normalize_coords = True

    # 1: Build the Keras model
    K.clear_session()
    model, pred_sizes = ssd_512(image_size=(img_height, img_width,
                                            img_channels),
                                n_classes=n_classes,
                                l2_regularization=0.0005,
                                scales=scales,
                                aspect_ratios_per_layer=aspect_ratios,
                                two_boxes_for_ar1=two_boxes_for_ar1,
                                steps=steps,
                                offsets=offsets,
                                limit_boxes=limit_boxes,
                                variances=variances,
                                coords=coords,
                                normalize_coords=normalize_coords,
                                subtract_mean=None,
                                divide_by_stddev=None,
                                swap_channels=swap_channels,
                                return_predictor_sizes=True)

    # 2: Load the trained VGG-16 weights into the model.
    model.load_weights(weights_path, by_name=True)

    # 3: Instantiate the optimizer and the SSD loss function and compile the model

    if optim is None:
        optim = SGD(lr=lr, momentum=0.9)
    ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

    # 4: freeze the base model if needed
    if freeze_bn:
        for l in model.layers[:38]:
            l.trainable = False

    # 5: compile model
    model.compile(optimizer=optim, loss=ssd_loss.compute_loss)

    ## Prepare data generation

    # 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.

    train_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])
    val_dataset = BatchGenerator(
        box_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])

    # 2: Parse the image and label lists for the training and validation datasets. This can take a while.

    images_path_root = './Datasets/'
    train_combined_labels = './Datasets/train_combined_ssd_512.txt'
    val_labels = './Datasets/val_ssd_512.txt'

    train_dataset.parse_csv(images_dir=images_path_root,
                            labels_filename=train_combined_labels,
                            input_format=[
                                'image_name', 'class_id', 'xmin', 'xmax',
                                'ymin', 'ymax'
                            ])

    val_dataset.parse_csv(images_dir=images_path_root,
                          labels_filename=val_labels,
                          input_format=[
                              'image_name', 'class_id', 'xmin', 'xmax', 'ymin',
                              'ymax'
                          ])

    # 3: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function.

    ssd_box_encoder = SSDBoxEncoder(img_height=img_height,
                                    img_width=img_width,
                                    n_classes=n_classes,
                                    predictor_sizes=pred_sizes,
                                    min_scale=None,
                                    max_scale=None,
                                    scales=scales,
                                    aspect_ratios_global=None,
                                    aspect_ratios_per_layer=aspect_ratios,
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    steps=steps,
                                    offsets=offsets,
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    pos_iou_threshold=0.5,
                                    neg_iou_threshold=0.2,
                                    coords=coords,
                                    normalize_coords=normalize_coords)

    # 4: Set the image processing / data augmentation options and create generator handles.

    train_generator = train_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=(0.5, 2, 0.5),
        flip=0.5,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=True,
        limit_boxes=True,
        # While the anchor boxes are not being clipped, the ground truth boxes should be
        include_thresh=0.4)

    val_generator = val_dataset.generate(
        batch_size=batch_size,
        shuffle=True,
        train=True,
        ssd_box_encoder=ssd_box_encoder,
        equalize=False,
        brightness=False,
        flip=False,
        translate=False,
        scale=False,
        max_crop_and_resize=(img_height, img_width, 1, 3),
        # This one is important because the Pascal VOC images vary in size
        random_pad_and_resize=(img_height, img_width, 1, 3, 0.5),
        # This one is important because the Pascal VOC images vary in size
        random_crop=False,
        crop=False,
        resize=False,
        gray=True,
        limit_boxes=True,
        include_thresh=0.4)

    # Get the number of samples in the training and validations datasets to compute the epoch lengths below.
    n_train_samples = train_dataset.get_n_samples()
    n_val_samples = val_dataset.get_n_samples()

    # ## 4. Run the training

    fingerprint = 'ssd{:%Y-%m-%d_%H-%M-%S}'.format(datetime.datetime.now())

    tbCallBack = TensorBoard(log_dir='./Graph/{}'.format(fingerprint),
                             histogram_freq=0,
                             batch_size=batch_size,
                             write_graph=True)

    checkpointer = ModelCheckpoint(
        './saved/{{val_loss:.2f}}__{}_best_weights.h5'.format(fingerprint),
        monitor='val_loss',
        verbose=1,
        save_best_only=save_weights_only,
        save_weights_only=True,
        mode='auto',
        period=1)

    learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                                patience=5,
                                                verbose=1,
                                                factor=0.5,
                                                min_lr=1e-6)

    stopper = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10)

    epochs = 30

    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=ceil(n_train_samples / batch_size),
        epochs=epochs,
        callbacks=[checkpointer, learning_rate_reduction, stopper, tbCallBack],
        validation_data=val_generator,
        validation_steps=ceil(n_val_samples / batch_size))
Exemplo n.º 2
0
dense_layer_size = 512

# Normalization parameters.

mean = 0.408808
std = 0.237583

t_mean = -0.041212
t_std = 0.323931

p_mean = -0.000276
p_std = 0.540958

# Models.

head_detector = ssd_512(image_size=(in_size_detector, in_size_detector, 3), n_classes=1, min_scale=0.1, max_scale=1, mode='inference')
head_detector.load_weights(detector_path)

pose_estimator = mpatacchiola_generic(in_size_estimator, num_conv_blocks, num_filters_start, num_dense_layers, dense_layer_size)
pose_estimator.load_weights(estimator_path)

# Get video source.

video_source = input("Input stream: ")

# If video source is a device convert the string identifying it to integer.

try:
    video_source = int(video_source)
except:
    pass
Exemplo n.º 3
0
def main():
    '''
    This function acts as a testbench for the function clean_pointing04, using it to perform the basic processing of the
    Pointing'04 dataset from a set of default values defined below.
    '''

    # Source paths.

    pointing04_dir = 'original/HeadPoseImageDatabase/'

    # Destination path.

    destination_dir = 'clean/pointing04/'

    # Detector model path.

    head_detector_path = 'models/head-detector.h5'

    # Detection parameters.

    in_size = 512
    out_size = 64
    confidence_threshold = 0.75

    # Output parameters.

    grayscale_output = True
    downscaling_interpolation = cv2.INTER_LINEAR

    # Number of splits for class assignation.

    num_splits_tilt = 8
    num_splits_pan = 8

    # Ratios for train/test and train/validation split.

    test_ratio = 0.2
    validation_ratio = 0.2

    # Detector model.

    detector = ssd_512(image_size=(in_size, in_size, 3),
                       n_classes=1,
                       min_scale=0.1,
                       max_scale=1,
                       mode='inference')
    detector.load_weights(head_detector_path)

    # Check if output directory exists.

    try:
        os.mkdir(destination_dir)
        print("Directory", destination_dir, "created.")
    except FileExistsError:
        print("Directory", destination_dir, "already exists.")
        shutil.rmtree(destination_dir)
        os.mkdir(destination_dir)

    # Actual cleaning.

    clean_pointing04(pointing04_dir, destination_dir, detector,
                     confidence_threshold, out_size, grayscale_output,
                     downscaling_interpolation)

    # Assign classes.

    class_assign(destination_dir, num_splits_tilt, num_splits_pan)

    # Split dataset.

    split_dataset(destination_dir, test_ratio, validation_ratio)

    # Get normalization parameters.

    find_norm_parameters(destination_dir)

    # OPTIONAL: Save dataset as numpy arrays (for uploading to Google Colab).

    store_dataset_arrays(destination_dir)
Exemplo n.º 4
0
clip_boxes = False  # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [
    0.1, 0.1, 0.2, 0.2
]  # The variances by which the encoded target coordinates are divided as in the original implementation
normalize_coords = True

# 1: Build the Keras model
K.clear_session()  # Clear previous models from memory.

model = ssd_512(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=True,
                steps=steps,
                offsets=offsets,
                clip_boxes=False,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)
# TODO: Scales ?
# TODO: aspect_ratios_per_layer ?
# TODO: two_boxes_for_ar1 ?
# TODO: steps ?
# TODO: offsets ?
# TODO: clip_boxes ?
# TODO: variances ?
# TODO: normalize_coords ?
# TODO: subtract_mean ?
Exemplo n.º 5
0
def main():

    rospy.init_node('new_detect_pkg', anonymous=True)
    ic = image_converter()
    icp = image_converter_pointcloud()
    r = rospy.Rate(50)
    rospy.sleep(0.5)
    publisher()
    print('---------initialization model...please wait----------')
    # ssd_entity = SSD_entity()

    img_height = 512  # 272 Height of the input images
    img_width = 512  # 480 Width of the input images
    img_channels = 3  # Number of color channels of the input images

    # 1: Build the Keras model

    K.clear_session()  # Clear previous models from memory.

    model = ssd_512(
        image_size=(img_height, img_width, 3),
        n_classes=20,
        mode='inference',
        l2_regularization=0.0005,
        scales=[
            0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05
        ],  # The scales for MS COCO are [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9, 1.06]
        aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                 [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
        two_boxes_for_ar1=True,
        steps=[8, 16, 32, 64, 128, 256, 512],
        offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
        clip_boxes=False,
        variances=[0.1, 0.1, 0.2, 0.2],
        normalize_coords=True,
        subtract_mean=[123, 117, 104],
        swap_channels=[2, 1, 0],
        confidence_thresh=0.5,
        iou_threshold=0.45,
        top_k=200,
        nms_max_output_size=400)

    print("Model built.")

    # 2: Load the sub-sampled weights into the model.

    # Load the weights that we've just created via sub-sampling.
    # model.load_weights('/home/ogai1234/catkin_ws/src/detect_pkg/bin/ssd300_weights_epoch-45_loss-4.3010_val_loss-4.3788.h5', by_name=True)
    # model.load_weights('/home/ogai1234/catkin_ws/src/detect_pkg/bin/VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5', by_name=True)
    model.load_weights(
        '/home/ogai1234/catkin_ws/src/detect_pkg/bin/VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5',
        by_name=True)

    print('---------model done----------')
    # model.load_weights(weights_path, by_name=True)

    # print("Weights file loaded:", weights_path)

    # classes = ["background", "dog", "umbrellaman", "cone", "car", "bicycle", "person"]
    classes = [
        'background', 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus',
        'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse',
        'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train',
        'Tvmonitor'
    ]
    key = ''
    t0 = time.time()
    frame_code = 1
    linewidth = 3
    figsize = (10, 10)
    # initilize the filter cach
    global record_xmin, record_xmax, record_ymin, record_ymax, record_cls_id, record_obj_id
    global record_obj_num_appeared, total_objPerFrame

    record_obj_num_appeared = 0

    record_xmin = [[0 for i in range(11)] for i in range(2)]
    record_xmax = [[0 for i in range(11)] for i in range(2)]
    record_ymin = [[0 for i in range(11)] for i in range(2)]
    record_ymax = [[0 for i in range(11)] for i in range(2)]
    record_cls_id = [[0 for i in range(11)] for i in range(2)]
    record_obj_id = [[0 for i in range(11)] for i in range(2)]

    global objPerFrame
    objPerFrame = 0
    total_objPerFrame = 0
    font = cv2.FONT_HERSHEY_TRIPLEX

    while (key != 113) and (not rospy.is_shutdown()):
        t1 = time.time()

        #one frame start
        frame_code = frame_code + 1
        print("_________________________Frame:", frame_code)

        # for ros topic publish
        global objPerlastFrame, type_code_list, confidence_list, distance_list, x_list, y_list, z_list
        objPerlastFrame = total_objPerFrame
        # calculate the num of object appeared in one frame
        objPerFrame = 0
        total_objPerFrame = 0

        # Ros message content initial
        type_code_list = [0 for i in range(11)]
        confidence_list = [0 for i in range(11)]
        distance_list = [0 for i in range(11)]
        x_list = [0 for i in range(11)]
        y_list = [0 for i in range(11)]
        z_list = [0 for i in range(11)]

        # load zed image from ros topic
        image = ic.zed_image  # image.shape = (720, 1280, 3)
        frame = image
        frame_old = frame
        # frame = frame[:,:,0:3]
        frame = cv2.resize(frame, (img_width, img_height))
        frame_np = np.array(frame)
        frame_np = frame_np[np.newaxis, :, :, :]

        # put frame into SSD network to do classification
        y_pred = model.predict(frame_np)

        confidence_threshold = 0.5
        y_pred_thresh = [
            y_pred[k][y_pred[k, :, 1] > confidence_threshold]
            for k in range(y_pred.shape[0])
        ]

        colors = dict()

        for box in y_pred_thresh[0]:
            if objPerFrame > 9:
                break
            cls_id = int(box[0])
            if cls_id not in colors:
                colors[cls_id] = (random.random(), random.random(),
                                  random.random())

            # for person and car/train/bus, collect its information and transfer to ROS
            if cls_id == 15:  #Person
                score = box[1]
                xmin = int(box[2] * frame_old.shape[1] / img_width)
                ymin = int(box[3] * frame_old.shape[0] / img_height)
                xmax = int(box[4] * frame_old.shape[1] / img_width)
                ymax = int(box[5] * frame_old.shape[0] / img_height)
                cv2.rectangle(frame_old, (xmin, ymin), (xmax, ymax),
                              color=colors[cls_id],
                              thickness=linewidth)
                # class_name = str(cls_id)
                # calculate distance and position
                x_center = round((xmin + xmax) / 2)
                y_center = round((ymin + ymax) / 2)
                icp.dict_1 = x_center
                icp.dict_2 = y_center
                rospy.sleep(0.04)
                point_cloud = icp.zed_image_pointcloud
                x, y, z = 0, 0, 0
                #x,y,z is the position obtained from pointcloud2
                for p in point_cloud:
                    x, y, z = p
                    break
                distance = math.sqrt(x * x + y * y + z * z)
                type_code = cls_id
                class_name = classes[cls_id]
                # Person's typecode is 1
                type_code = 1
                total_objPerFrame = total_objPerFrame + 1
                appeared = frameFilter(cls_id, xmin, xmax, ymin, ymax)
                if (appeared == 1):
                    objRecord2Ros(class_name, score, cls_id, type_code,
                                  distance, x, y, z)
                cv2.putText(frame_old,
                            '{:s} | {:.2f} | {:.2f} |{:}'.format(
                                class_name, score, distance,
                                record_obj_id[1][total_objPerFrame]),
                            (xmin, ymin + 2),
                            font,
                            0.5, (255, 255, 255),
                            thickness=1)

            if cls_id == 7:  #Car
                score = box[1]
                xmin = int(box[2] * frame_old.shape[1] / img_width)
                ymin = int(box[3] * frame_old.shape[0] / img_height)
                xmax = int(box[4] * frame_old.shape[1] / img_width)
                ymax = int(box[5] * frame_old.shape[0] / img_height)
                cv2.rectangle(frame_old, (xmin, ymin), (xmax, ymax),
                              color=colors[cls_id],
                              thickness=linewidth)
                # class_name = str(cls_id)
                # calculate distance and position
                x_center = round((xmin + xmax) / 2)
                y_center = round((ymin + ymax) / 2)
                icp.dict_1 = x_center
                icp.dict_2 = y_center
                rospy.sleep(0.04)
                point_cloud = icp.zed_image_pointcloud
                x, y, z = 0, 0, 0
                #x,y,z is the position obtained from pointcloud2
                for p in point_cloud:
                    x, y, z = p
                    break
                distance = math.sqrt(x * x + y * y + z * z)
                type_code = cls_id
                class_name = classes[cls_id]
                # Car's typecode is 4
                type_code = 2
                total_objPerFrame = total_objPerFrame + 1
                appeared = frameFilter(cls_id, xmin, xmax, ymin, ymax)
                if (appeared == 1):
                    objRecord2Ros(class_name, score, cls_id, type_code,
                                  distance, x, y, z)
                cv2.putText(frame_old,
                            '{:s} | {:.2f} |{:}'.format(
                                class_name, distance,
                                record_obj_id[1][total_objPerFrame]),
                            (xmin, ymin + 2),
                            font,
                            0.5, (255, 255, 255),
                            thickness=1)

            if cls_id == 6:  #Bus
                score = box[1]
                xmin = int(box[2] * frame_old.shape[1] / img_width)
                ymin = int(box[3] * frame_old.shape[0] / img_height)
                xmax = int(box[4] * frame_old.shape[1] / img_width)
                ymax = int(box[5] * frame_old.shape[0] / img_height)
                cv2.rectangle(frame_old, (xmin, ymin), (xmax, ymax),
                              color=colors[cls_id],
                              thickness=linewidth)
                # class_name = str(cls_id)
                # calculate distance and position
                x_center = round((xmin + xmax) / 2)
                y_center = round((ymin + ymax) / 2)
                icp.dict_1 = x_center
                icp.dict_2 = y_center
                rospy.sleep(0.04)
                point_cloud = icp.zed_image_pointcloud
                x, y, z = 0, 0, 0
                #x,y,z is the position obtained from pointcloud2
                for p in point_cloud:
                    x, y, z = p
                    break
                distance = math.sqrt(x * x + y * y + z * z)
                type_code = cls_id
                class_name = classes[cls_id]
                # Car's typecode is 4
                type_code = 2
                total_objPerFrame = total_objPerFrame + 1
                appeared = frameFilter(cls_id, xmin, xmax, ymin, ymax)
                if (appeared == 1):
                    objRecord2Ros(class_name, score, cls_id, type_code,
                                  distance, x, y, z)
                cv2.putText(frame_old,
                            '{:s} | {:.2f} |{:}'.format(
                                class_name, distance,
                                record_obj_id[1][total_objPerFrame]),
                            (xmin, ymin + 2),
                            font,
                            0.5, (255, 255, 255),
                            thickness=1)

            if cls_id == 19:  #Train
                score = box[1]
                xmin = int(box[2] * frame_old.shape[1] / img_width)
                ymin = int(box[3] * frame_old.shape[0] / img_height)
                xmax = int(box[4] * frame_old.shape[1] / img_width)
                ymax = int(box[5] * frame_old.shape[0] / img_height)
                cv2.rectangle(frame_old, (xmin, ymin), (xmax, ymax),
                              color=colors[cls_id],
                              thickness=linewidth)
                # class_name = str(cls_id)
                # calculate distance and position
                x_center = round((xmin + xmax) / 2)
                y_center = round((ymin + ymax) / 2)
                icp.dict_1 = x_center
                icp.dict_2 = y_center
                rospy.sleep(0.04)
                point_cloud = icp.zed_image_pointcloud
                x, y, z = 0, 0, 0
                #x,y,z is the position obtained from pointcloud2
                for p in point_cloud:
                    x, y, z = p
                    break
                distance = math.sqrt(x * x + y * y + z * z)
                type_code = cls_id
                class_name = classes[cls_id]
                # Car's typecode is 4
                type_code = 4
                total_objPerFrame = total_objPerFrame + 1
                appeared = frameFilter(cls_id, xmin, xmax, ymin, ymax)
                if (appeared == 1):
                    objRecord2Ros(class_name, score, cls_id, type_code,
                                  distance, x, y, z)
                cv2.putText(frame_old,
                            '{:s} | {:.2f} |{:}'.format(
                                class_name, distance,
                                record_obj_id[1][total_objPerFrame]),
                            (xmin, ymin + 2),
                            font,
                            0.5, (255, 255, 255),
                            thickness=1)

            # if want show all classes, uncomment this line , and comment the upper 2 ifs
            # objRecord2Ros(class_name,score,cls_id,type_code,distance,x,y,z)
            # cv2.putText(frame, '{:s} | {:.2f} |{:}'.format(class_name,distance,record_obj_id[1][total_objPerFrame]), (xmin, ymin+2), font, 0.5, (255, 255, 255), thickness=1)

            # draw the bounding box
            # print format: class|conf|distance|x|y

        # show the image with bounding box
        # image_back = cv2.resize(frame, (1080,720))
        cv2.imshow("image_back", frame_old)
        key = cv2.waitKey(1)
        t21 = time.time()
        # calculate fps
        print('fps {:f}'.format(1 / (t21 - t1)))
        talker()

        # last frame info move forward
        for i in range(10):
            # print('record_xmin[0][i]:',record_xmin[0][i+1])
            # print('record_xmin[1][i]:',record_xmin[1][i+1])
            record_xmin[0][i + 1] = record_xmin[1][i + 1]
            record_xmax[0][i + 1] = record_xmax[1][i + 1]
            record_ymin[0][i + 1] = record_ymin[1][i + 1]
            record_ymax[0][i + 1] = record_ymax[1][i + 1]
            record_cls_id[0][i + 1] = record_cls_id[1][i + 1]
            record_obj_id[0][i + 1] = record_obj_id[1][i + 1]
            record_xmin[1][i + 1] = 0
            record_xmax[1][i + 1] = 0
            record_ymin[1][i + 1] = 0
            record_ymax[1][i + 1] = 0
            record_cls_id[1][i + 1] = 0
            record_obj_id[1][i + 1] = 0
        if record_obj_num_appeared > 10000:
            record_obj_num_appeared = 0
Exemplo n.º 6
0
model_dir = 'models/'
model_name = 'head-detector'

matconvnet_model_path = model_dir + model_name + '.mat'
keras_model_path = model_dir + model_name + '.h5'

layer_mapping_path = model_dir + 'layers.csv'

# Model parameters.

img_height = 512
img_width = 512

# Model instance.

model = ssd_512(image_size=(img_height, img_width, 3), n_classes=1, min_scale=0.1, max_scale=1, mode='inference')

# Load weights.

model_ori = loadmat(matconvnet_model_path)

# Load layer mapping between MatConvNet and Keras models.

layers = pd.read_csv(layer_mapping_path)

# For each entry in the mapping, set its weights and biases from the original model:

for index, layer in layers.iterrows():

    print("Assigning layer " + layer['name'] + "...")