Пример #1
0
def getGTLabels(myRel, GTMeta, cfg):
    labels = []
    for relID, relGT in GTMeta['rels'].items():
        prsIoU = utils.get_iou(myRel['prsBB'], relGT['prsBB'])
        objIoU = utils.get_iou(myRel['objBB'], relGT['objBB'])
        if prsIoU > cfg.minIoU and objIoU > cfg.minIoU:
            labels.append(relGT['label'])
    return labels
Пример #2
0
def calculate_iou(m_name, gp_id, r_dict):
    checkpoint_path = checkpoint_folder + '/' + m_name
    criterion = nn.CrossEntropyLoss()
    model = load_model(checkpoint_path, num_labels, gp_id)
    average_iou = []
    correct = 0
    num_img = 0
    for folder_name in subfolders:
        label = int(folder_name.split('.')[0]) - 1
        target_tensor = torch.tensor([label])
        image_names = os.listdir(val_folder_path + '/' + folder_name)
        for img_name in image_names:
            img_path = val_folder_path + '/' + folder_name + '/' + img_name
            x1_gt, y1_gt, x2_gt, y2_gt = bbox.get_bbox_from_path(img_path)
            gt = [x1_gt, y1_gt, x2_gt, y2_gt]
            prediction, grad = predict(model, img_path, transform, criterion,
                                       target_tensor, height, width,
                                       num_channels, gp_id)
            pred = bounding_box_grad(grad)
            overlap = check_overlap(pred, gt)
            if overlap:
                iou = get_iou(pred, gt)
            else:
                iou = 0.0

            average_iou.append(iou)
            num_img += 1
            if prediction == label and iou >= 0.5:
                correct += 1

    # print(f'{m_name}: {np.mean(average_iou)}')
    loc_acc = float(correct) / float(num_img)
    avg_iou = np.mean(average_iou)
    r_dict[m_name] = [avg_iou, loc_acc]
Пример #3
0
def cut_img(model, file, realoutput=None):
    fullpath = os.path.join(test_dir, file)
    print(fullpath)
    img_origin = cv.imdecode(np.fromfile(fullpath, dtype=np.uint8),
                             cv.IMREAD_COLOR)

    w, h, c = img_origin.shape
    img = cv.resize(img_origin, (im_size, im_size))
    img = img[..., ::-1]  # RGB
    img = transforms.ToPILImage()(img)
    img = transformer(img)

    with torch.no_grad():
        output = model(torch.unsqueeze(img, 0).to(device))
    output = output.reshape(4, -1)
    output = output.cpu().numpy()
    output = output * [h, w]
    output = output.reshape(-1)
    output = sort_four_dot(output)

    img = draw_bboxes2(img_origin, output)
    cv.imwrite('{}_out.jpg'.format(test_dir + '/image/' + file), img)
    img2 = cut_and_adjust_img(img, output)
    cv.imwrite('{}_adjust.jpg'.format(test_dir + '/image/' + file), img2)

    if realdots is not None:
        roi = get_iou(output, realoutput)
        print(roi)
        img0 = draw_bboxes2(img_origin, realoutput, 'g')
        cv.imwrite('{}_real.jpg'.format(test_dir + '/image/' + file), img0)
        return roi
Пример #4
0
def add_features(df):
    df['iou'] = df.apply(lambda row: get_iou(row), axis=1)
    df['size1'] = df.apply(lambda row: (row.XMax1 - row.XMin1) *
                           (row.YMax1 - row.YMin1),
                           axis=1)
    df['size2'] = df.apply(lambda row: (row.XMax2 - row.XMin2) *
                           (row.YMax2 - row.YMin2),
                           axis=1)
    df['xcenter1'] = df.apply(lambda row: (row.XMax1 + row.XMin1) / 2, axis=1)
    df['xcenter2'] = df.apply(lambda row: (row.XMax2 + row.XMin2) / 2, axis=1)
    df['ycenter1'] = df.apply(lambda row: (row.YMax1 + row.YMin1) / 2, axis=1)
    df['ycenter2'] = df.apply(lambda row: (row.YMax2 + row.YMin2) / 2, axis=1)
    df['aspect1'] = df.apply(lambda row: (row.XMax1 - row.XMin1) /
                             (row.YMax1 - row.YMin1 + 1e-6),
                             axis=1)
    df['aspect2'] = df.apply(lambda row: (row.XMax2 - row.XMin2) /
                             (row.YMax2 - row.YMin2 + 1e-6),
                             axis=1)
    df['xcenterdiff'] = df.apply(lambda row: ((row.XMax1 + row.XMin1) -
                                              (row.XMax2 + row.XMin2)) / 2,
                                 axis=1)
    df['ycenterdiff'] = df.apply(lambda row: ((row.YMax1 + row.YMin1) -
                                              (row.YMax2 + row.YMin2)) / 2,
                                 axis=1)
    return df
Пример #5
0
def calculate_iou(m_name, gp_id):
    checkpoint_path = checkpoint_folder + '/' + m_name
    criterion = nn.CrossEntropyLoss()
    model = load_model(checkpoint_path, num_labels, gp_id)
    average_iou = []
    for folder_name in subfolders:
        label = int(folder_name.split('.')[0]) - 1
        target_tensor = torch.tensor([label])
        image_names = os.listdir(test_folder_path + '/' + folder_name)
        for img_name in image_names:
            img_path = test_folder_path + '/' + folder_name + '/' + img_name
            x1_gt, y1_gt, x2_gt, y2_gt = bbox.get_bbox_from_path(img_path)
            gt = [x1_gt, y1_gt, x2_gt, y2_gt]
            grad = predict(model, img_path, transform, criterion,
                           target_tensor, height, width, num_channels, gp_id)
            pred = bounding_box_grad(grad)
            overlap = check_overlap(pred, gt)
            if overlap:
                iou = get_iou(pred, gt)
            else:
                iou = 0.0

            average_iou.append(iou)

    return np.average(average_iou)
Пример #6
0
def calculate_loc(m_name, gp_id):
    checkpoint_path = checkpoint_folder + '/' + m_name
    criterion = nn.CrossEntropyLoss()
    model = load_model(checkpoint_path, num_labels, gp_id)
    correct = 0
    total = 0
    for folder_name in subfolders:
        label = int(folder_name.split('.')[0]) - 1
        target_tensor = torch.tensor([label])
        image_names = os.listdir(test_folder_path + '/' + folder_name)
        for img_name in image_names:
            img_path = test_folder_path + '/' + folder_name + '/' + img_name
            x1_gt, y1_gt, x2_gt, y2_gt = bbox.get_bbox_from_path(img_path)
            gt = [x1_gt, y1_gt, x2_gt, y2_gt]
            prediction, grad = predict(model, img_path, transform, criterion,
                                       target_tensor, height, width,
                                       num_channels, gp_id)
            pred = bounding_box_grad(grad)
            overlap = check_overlap(pred, gt)
            if overlap:
                iou = get_iou(pred, gt)
            else:
                iou = 0.0

            total += 1
            if prediction == label and iou >= 0.5:
                correct += 1

    acc = float(correct) / float(total)
    return acc
Пример #7
0
    def __call__(self, gt, pred):

        gt_egg, gt_pan = gt
        pred_egg, pred_pan = pred

        # Apply sigmoid and threshold by 0.5
        pred_egg = (torch.sigmoid(pred_egg) >= 0.5).type(pred_egg.dtype)
        pred_pan = (torch.sigmoid(pred_pan) >= 0.5).type(pred_pan.dtype)

        egg_iou = get_iou(gt_egg, pred_egg)
        pan_iou = get_iou(gt_pan, pred_pan)

        self.running_iou += (egg_iou.item() + pan_iou.item()) / 2
        self.running_samples += 1

        return self.running_iou / self.running_samples
Пример #8
0
def train_model(model, optimizer, train_dataloader, val_dataloader, criterion, regression_criterion, num_epochs=1):
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_mIoU = 0.0
    
    loss_history = []
    iou_history = []
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_iou = 0.0
        
        for inputs, labels in train_dataloader:
            
            inputs = inputs.to(device)
            target_bbox = labels[:, :4].to(device)
            target_class = labels[:, 4].to(device)
            
            optimizer.zero_grad()

            pred_class, pred_bbox = model(inputs)
            classification_loss = criterion(pred_class.squeeze(), target_class)
            
            regression_loss = regression_criterion(pred_bbox, target_bbox).sqrt()
            loss = 0.5 * classification_loss + regression_loss
            loss.backward()
            optimizer.step()
            
            running_loss += loss.data
                    
        for inputs, labels in val_dataloader:
            
            inputs = inputs.to(device)
            target_bbox = labels[:, :4].to(device)
            target_class = labels[:, 4].to(device)
            
            with torch.no_grad():
                pred_class, pred_bbox = model(inputs)
            
            mIoU = get_iou(pred_bbox, target_bbox, inputs)
            
            running_iou += mIoU
            
        mean_loss = running_loss/len(train_dataloader)
        mIoU = running_iou/len(val_dataloader)

        loss_history.append(mean_loss)
        iou_history.append(mIoU)

        print(f'Epoch {epoch}/{num_epochs-1}. Loss: {mean_loss:.4f}. mIoU: {mIoU:.4f}')
        
        if mIoU > best_mIoU:
            best_mIoU = float(mIoU.cpu())
            best_model_wts = copy.deepcopy(model.state_dict())
            
    model.load_state_dict(best_model_wts)
    return model, loss_history, iou_history
def calculate_yolo_metric(sess, X, pred, data_files, batch_size=100):
    count = 0
    iou_total = 0
    wrong_area = 0
    correct_g = 0
    correct_a = 0
    img_iter = single_np_datapoint_generator(data_files)
    for imgs, outs in group_iterable_into_list(img_iter, batch_size, 2):
        preds = sess.run(pred, feed_dict={X: imgs})
        for c in range(len(imgs)):
            out = outs[c]
            o = preds[c]
            orig = np.argwhere(out[:, :, 0] > 0.6)
            y_o, x_o, h_o, w_o, g_o, a_o = get_prediction(
                out, orig[0], give_prob=False)  # assuming only one face
            found = np.argwhere(o[:, :, 0] > 0.7)
            blist = []
            probs = []
            for i in found:
                y, x, h, w, g, a = get_prediction(o, i, give_prob=False)
                blist.append((x, y, w, h, g, a))
                probs.append(o[i[0], i[1], 0])
            filtered = non_max_suppress(blist, probs)
            max_iou = -1
            o_box = {'x1': x_o, 'x2': x_o + w_o, 'y1': y_o, 'y2': y_o + h_o}
            g_p, a_p = '', ''
            for x, y, w, h, g, a in filtered:
                iou = get_iou(o_box, {
                    'x1': x,
                    'x2': x + w,
                    'y1': y,
                    'y2': y + h
                })
                if iou > 0.2 and iou > max_iou:
                    if max_iou > 0:
                        wrong_area += 1.0 - max_iou
                    max_iou = iou
                    g_p = g
                    a_p = a
                else:
                    wrong_area += 1.0 - iou
            if max_iou > 0:
                iou_total += max_iou
                if g_o == g_p:
                    correct_g += 1.0
                if a_o == a_p:
                    correct_a += 1.0
            count += 1
        print("Processed", count, "images")
    return {
        "total_count": count,
        "average_iou": iou_total / count,
        "average_false_iou": wrong_area / count,
        "gender_accuracy": correct_g / count,
        "age_accuracy": correct_a / count
    }
Пример #10
0
def drawOverlapRois(img, rois, imageMeta, imageDims, cfg, obj_mapping):
    import filters_helper as helper
    import utils
    f, spl = plt.subplots(1)
    spl.axis('off')
    spl.imshow(img)
    bboxes = []
    gta = helper.normalizeGTboxes(imageMeta['objects'],
                                  scale=imageDims['scale'],
                                  rpn_stride=cfg.rpn_stride)
    inv_obj_mapping = {x: key for key, x in obj_mapping.items()}
    for roi in rois:
        (xmin, ymin, width, height) = roi[0:4]
        label = int(roi[5])
        prop = roi[4]
        rt = {
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmin + width,
            'ymax': ymin + height
        }
        best_iou = 0.0
        for bbidx, gt in enumerate(gta):
            gt_label = obj_mapping[gt['label']]

            if label != gt_label:
                continue
            curr_iou = utils.get_iou(gt, rt)
            if curr_iou > best_iou:
                #                print(curr_iou)
                best_iou = curr_iou
        if best_iou >= 0.5:
            c = 'red'
            print('Pos. label:', inv_obj_mapping[label], prop, best_iou)
        elif best_iou >= 0:
            c = 'blue'
            continue
            print('Neg. label:', inv_obj_mapping[label], prop, best_iou)
        else:
            continue
        bb = {key: x * cfg.rpn_stride for key, x in rt.items()}
        bbox = np.copy(drawBoundingBox(bb))
        spl.plot(bbox[0, :], bbox[1, :], c=c)
        bboxes.append(bb)

    f.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.02)
def gen_neg_img(img, boxes, nums=50):
    """
    产生negative图片
    :param img: 原图
    :param boxes: 人脸框, shape=(-1, 4)
    :param nums: 总需产生个数
    :return: tuple, 裁剪后的图片和标签
    """
    h, w, _ = img.shape

    have = 0
    while nums:
        # 随机裁剪
        size = npr.randint(12, max(min(h, w)//2, 13))
        x = npr.randint(0, w-size)
        y = npr.randint(0, h-size)

        iou = get_iou(np.array([x, y, size, size]), boxes)
        if np.max(iou) < 0.3:
            yield img[y:y+size, x:x+size], 0, [0.] * 4
            have += 1

        # # 在boxes周围裁剪
        # size = npr.randint(boxes[:, 2:]*0.7-1, boxes[:, 2:]*1.3)
        # size = np.maximum([12, 12], size)

        # point = npr.randint(boxes[:, :2]-size*0.3-1, boxes[:, :2]+size*0.3)
        # point = np.maximum([0, 0], point)

        # crop_box = np.hstack([point, size])

        # for box in crop_box:
        #     if box[0] + box[2] >= w or box[1] + box[3] >= h:
        #         continue
        #     iou = get_iou(box, boxes)
        #     if np.max(iou) >= 0.3:
        #         continue
        #     x, y, ww, hh = box
        #     yield img[y:y+hh, x:x+ww], 0, [0.] * 4
        #     have += 1
        
        nums -= 1

    return have
Пример #12
0
    def get_pair(self):
        self.result_pair = {}

        for i,true_box in enumerate(self.true_result.all_box):
            max_iou = 0
            pair = -1
            for j,pre_box in enumerate(self.pre_result.connect_result):
                iou = get_iou(pre_box.bbox,true_box.bbox)
                if iou>max_iou:
                    max_iou = iou
                    pair = j

            if max_iou>0.5:
                self.result_pair[i] = pair

            else:
                self.result_pair[i] = -1


        return self.result_pair
Пример #13
0
def visual_img(model):
    with open(pickle_file, 'rb') as file:
        data = pickle.load(file)

    samples = [item for item in data]
    samples = random.sample(samples, img_num)
    imgs = torch.zeros([img_num, 3, im_size, im_size], dtype=torch.float)
    ensure_folder('images')
    origin_pts = []
    for i in range(img_num):
        sample = samples[i]
        fullpath = sample['fullpath']
        raw = cv.imread(fullpath)
        raw = cv.resize(raw, (im_size, im_size))
        img = raw[..., ::-1]  # RGB
        img = transforms.ToPILImage()(img)
        img = transformer(img)
        imgs[i] = img

        cv.imwrite('images/{}_img.jpg'.format(i), raw)
        # print(sample['pts'])
        raw = draw_bboxes2(raw, sample['pts'], thick=3)
        origin_pts.append(sample['pts'])
        cv.imwrite('images/{}_true.jpg'.format(i), raw)

    with torch.no_grad():
        outputs = model(imgs.to(device))

    iou_sum = 0
    for i in range(img_num):
        output = outputs[i].cpu().numpy()
        output = output * im_size
        # print('output: ' + str(output))
        # print('output.shape: ' + str(output.shape))

        img = cv.imread('images/{}_img.jpg'.format(i))
        # print(output)
        img = draw_bboxes2(img, output, thick=3)
        iou_sum += get_iou(origin_pts[i], output)
        cv.imwrite('images/{}_out.jpg'.format(i), img)
    return iou_sum / img_num
def gen_hard(net, img, boxes):
    if net not in DETECTORS:
        DETECTORS[net] = Detector(0.5, 0.5, net=net)
    detector = DETECTORS[net]
    bbox, _ = detector.predict(img, 20, net=net)
    crop_nums = [0] * 3
    for box in bbox:
        crop_img = img[box[1]: box[3], box[0]: box[2]]

        box[2] -= box[0]
        box[3] -= box[1]

        iou = get_iou(box, boxes)

        max_ind = np.argmax(iou)

        if iou[max_ind] < 0.3:
            if crop_nums[0] < 50:
                yield crop_img, 0, [0.] * 4
                crop_nums[0] += 1
            continue
        if iou[max_ind] < 0.4:
            continue

        true_box = boxes[max_ind]
        x1, y1, w, h = true_box
        
        offset_x1 = (x1 - box[0]) / float(box[2])
        offset_y1 = (y1 - box[1]) / float(box[3])

        offset_x2 = (x1+w-box[0]-box[2]) / float(box[2])
        offset_y2 = (y1+h-box[1]-box[3]) / float(box[3])

        if iou[max_ind] >= 0.65:
            yield crop_img, 1, (offset_x1, offset_y1, offset_x2, offset_y2)
            crop_nums[1] += 1

        else:
            yield crop_img, -1, (offset_x1, offset_y1, offset_x2, offset_y2)
            crop_nums[2] += 1
Пример #15
0
def drawOverlapAnchors(img, anchors, imageMeta, imageDims, cfg):
    import filters_helper as helper
    import utils
    f, spl = plt.subplots(1)
    spl.axis('off')
    spl.imshow(img)
    bboxes = []
    gta = helper.normalizeGTboxes(imageMeta['objects'],
                                  scale=imageDims['scale'],
                                  rpn_stride=cfg.rpn_stride)

    for anchor in anchors:
        (xmin, ymin, width, height) = anchor[0:4]
        rt = {
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmin + width,
            'ymax': ymin + height
        }
        best_iou = 0.0
        for bbidx, gt in enumerate(gta):
            curr_iou = utils.get_iou(gt, rt)
            if curr_iou > best_iou:
                #                print(curr_iou)
                best_iou = curr_iou
        if best_iou >= 0.5:
            c = 'red'
        else:
            c = 'blue'
            continue
        bb = {key: x * cfg.rpn_stride for key, x in rt.items()}
        bbox = drawBoundingBox(bb)
        spl.plot(bbox[0, :], bbox[1, :], c=c)
        bboxes.append(bb)

    f.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.02)
Пример #16
0
                # -----------------
                inputs, labels = Variable(inputs,
                                          requires_grad=True), Variable(labels)
                if gpu_id >= 0:
                    inputs, labels = inputs.cuda(), labels.cuda()

                with torch.no_grad():
                    outputs = net.forward(inputs)

                predictions = torch.max(outputs, 1)[1]

                loss = criterion(outputs,
                                 labels,
                                 size_average=False,
                                 batch_average=True)
                running_loss_val += loss.item()

                total_iou += utils.get_iou(predictions, labels)

                # Print loss and MIoU
                if ii % num_img_val == num_img_val - 1:
                    miou = total_iou / (ii * valBatch + inputs.data.shape[0])
                    running_loss_val = running_loss_val / num_img_val

                    print('Validation')
                    print('[Epoch: %d, numImages: %5d]' %
                          (epoch, ii * valBatch + inputs.data.shape[0]))
                    print('Loss: %f' % running_loss_val)
                    print('MIoU: %f\n' % miou)
                    running_loss_val = 0
Пример #17
0
def createTargets(bboxes, imageMeta, imageDims, class_mapping, cfg):
    #out: rois [{1}, {...}, (1,ymin,xmin,ymax,xmax)]
    #out: labels [{1}, {...}, {nb_object_classes}]
    #out: deltas [{1}, {...}, (dx,dy,dw,dh) * (nb_object_classes-1)]

    #############################
    ########## Image ############
    #############################
    gt_bboxes = imageMeta['objects']

    scale = imageDims['scale']
    #    shape = imageDims['shape']

    #############################
    ###### Set Parameters #######
    #############################
    rpn_stride = cfg.rpn_stride
    detection_max_overlap = cfg.detection_max_overlap
    detection_min_overlap = cfg.detection_min_overlap

    #############################
    #### Initialize matrices ####
    #############################
    x_roi = []
    y_class_num = []
    y_class_regr_coords = []
    y_class_regr_label = []
    IoUs = []  # for debugging only

    #############################
    ##### Ground truth boxes ####
    #############################
    gta = helper.normalizeGTboxes(gt_bboxes,
                                  scale=scale,
                                  rpn_stride=rpn_stride)

    #############################
    #### Ground truth objects ###
    #############################
    for ix in range(bboxes.shape[0]):
        (xmin, ymin, width, height, prop) = bboxes[ix, :5]
        #        xmin = int(round(xmin))
        #        ymin = int(round(ymin))
        #        xmax = int(round(xmax))
        #        ymax = int(round(ymax))

        rt = {
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmin + width,
            'ymax': ymin + height
        }

        best_iou = 0.0
        best_bbox = -1
        for bbidx, gt in enumerate(gta):
            curr_iou = utils.get_iou(gt, rt)
            if curr_iou > best_iou:
                #                print(curr_iou)
                best_iou = curr_iou
                best_bbox = bbidx

        if best_iou < detection_min_overlap:
            continue
        else:
            x_roi.append([xmin, ymin, width, height, prop])
            IoUs.append(best_iou)

            if detection_min_overlap <= best_iou < detection_max_overlap:
                # hard negative example
                cls_name = 'bg'
            elif detection_max_overlap <= best_iou:
                cls_name = gt_bboxes[best_bbox]['label']
                tx, ty, tw, th = helper.get_GT_deltas(gta[best_bbox], rt)
#                bxmin, bymin, bw, bh = helper.apply_regr([xmin,ymin,width,height], [tx,ty,tw,th])
#                print(best_iou)
#                print('rt',rt['xmin'], rt['ymin'], rt['xmax'], rt['ymax'])
#                print('gt',gta[best_bbox]['xmin'], gta[best_bbox]['ymin'], gta[best_bbox]['xmax'], gta[best_bbox]['ymax'])
#                print('bb',bxmin, bymin, bxmin + bw, bymin + bh)
            else:
                print('roi = {}'.format(best_iou))
                raise RuntimeError

        # Classification ground truth
        class_num = class_mapping[cls_name]
        class_label = len(class_mapping) * [0]
        class_label[class_num] = 1
        y_class_num.append(copy.deepcopy(class_label))
        # Regression ground truth
        coords = [0] * 4 * (len(class_mapping) - 1)
        labels = [0] * 4 * (len(class_mapping) - 1)
        if cls_name != 'bg':
            label_pos = 4 * (class_num - 1)
            sx, sy, sw, sh = cfg.det_regr_std
            coords[label_pos:4 +
                   label_pos] = [tx * sx, ty * sy, tw * sw, th * sh]
            labels[label_pos:4 + label_pos] = [1, 1, 1, 1]
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))
        else:
            y_class_regr_coords.append(copy.deepcopy(coords))
            y_class_regr_label.append(copy.deepcopy(labels))

    if len(x_roi) == 0:
        #        print('x roi none')
        return None, None, None, None

    rois = np.array(x_roi)
    y_class_regr_label = np.array(y_class_regr_label)
    y_class_regr_coords = np.array(y_class_regr_coords)

    true_labels = np.array(y_class_num)
    true_boxes = np.concatenate([y_class_regr_label, y_class_regr_coords],
                                axis=1)

    return np.expand_dims(rois, axis=0), np.expand_dims(
        true_labels, axis=0), np.expand_dims(true_boxes, axis=0), IoUs
Пример #18
0
def getBoundingBoxes(imagesMeta, objects, labels):
    newImagesMeta = {}
    imagesBadOnes = {}
    noImage = 0
    total = 0

    for imageID, imageMeta in imagesMeta.items():
        try:
            root = ET.parse(url + 'bbox/' + imageID + '.xml').getroot()
        except FileNotFoundError as e:
            print("missing", imageID)
            continue
        relsObj = imageMeta['rels']
        relsTmp = []
        relsObjBad = []
        relsPrsBad = []
        persons = []

        # Add objects
        for elem in root:
            if elem.tag != "object":
                continue

            # BB name
            objID = elem.find('name').text
            if objID not in objects.keys():
                continue
            objName = objects[objID]

            ## BB coordinates
            bbXML = elem.find('bndbox')
            xmin = int(bbXML.find('xmin').text)
            xmax = int(bbXML.find('xmax').text)
            ymin = int(bbXML.find('ymin').text)
            ymax = int(bbXML.find('ymax').text)
            bb = {'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax}

            if objName == 'person':
                persons.append(bb)
            else:
                #Meta relation
                if objName in relsObj.keys():
                    pred = relsObj[objName]
                    label = labels[pred + objName]
                    relsTmp.append({
                        'labels': [label],
                        'names': [{
                            'pred': pred,
                            'obj': objName
                        }],
                        'objBB': bb
                    })
                else:
                    relsObjBad.append({
                        'pred': pred,
                        'name': objName,
                        'bb': bb
                    })
                total += 1

        # Choose best person boxes
        bestPrs = np.array([[0.0, 0.0, None] for i in range(len(relsTmp))])
        prsIdx = 0
        for perBB in persons:
            IoUs = np.zeros([len(relsTmp), 2])
            relIdx = 0
            for rel in relsTmp:
                objBB = rel['objBB']
                IoUPsy = utils.get_iou(objBB, perBB, False)
                IoU = utils.get_iou(objBB, perBB)
                IoUs[relIdx, :] = [IoUPsy, IoU]
                relIdx += 1
            bestIdx = np.argmax(IoUs[:, 0])
            if IoUs[bestIdx,
                    0] > bestPrs[bestIdx, 0] or IoUs[bestIdx, 0] == bestPrs[
                        bestIdx, 0] and IoUs[bestIdx, 1] > bestPrs[bestIdx, 1]:
                bestPrs[bestIdx, :] = np.array(
                    [IoUs[bestIdx, 0], IoUs[bestIdx, 1], prsIdx])
            prsIdx += 1

        # Add best persons
        relsFinal = {}
        relIdx = 0
        objGood = False
        perGood = True
        for [bestIoUPsy, _, prsIdx] in bestPrs:
            if bestIoUPsy > 0.1:
                relTmp = relsTmp[relIdx]
                relTmp['prsBB'] = persons[int(prsIdx)]
                relTmp['prsID'] = int(prsIdx)
                relsFinal[relIdx] = relTmp
            else:
                perGood = False
            relIdx += 1
            objGood = True

        # Add bad persons
        bestPrsIdx = bestPrs[:, 2]
        bestPrsIdx = bestPrsIdx[bestPrsIdx != np.array(None)]
        bestPrsIdx = bestPrsIdx.astype(int)
        for i in range(len(persons)):
            if i not in bestPrsIdx:
                relsPrsBad.append({
                    'pred': '',
                    'name': 'person',
                    'bb': persons[i]
                })

        if not objGood or not perGood:
            continue

        imageMeta['rels'] = relsFinal
        newImagesMeta[imageID] = imageMeta
        imagesBadOnes[imageID] = [relsObjBad, relsPrsBad]
        noImage += 1
    #print(badOnes)
    print(total)
    return newImagesMeta, imagesBadOnes
Пример #19
0
def detect_pedrestrian(img,
                       pedestrians_bounding_boxes,
                       sliding_window_parameters,
                       classifier_svm,
                       grayscale=False,
                       must_normalize=True):
    """A partir de la imagen pasada por parametro se realiza una
    ventana deslizante y se dibujan las areas donde fue detectada una persona"""
    height, width = len(img), len(img[1])
    # block_heigth, block_width = SLIDING_WINDOW_SIZE
    block_width, block_heigth = sliding_window_parameters
    y = 0
    plt.imshow(img, cmap='gray')
    hogs_to_hard_mining = []
    # stride_y, stride_x = SLIDING_WINDOW_STRIDE
    stride_y, stride_x = int(SLIDING_WINDOW_STRIDE[0] / 2), int(
        SLIDING_WINDOW_STRIDE[1] / 2)
    # Datos de precision, recall, etc
    total_pedestrian = len(pedestrians_bounding_boxes)
    pedrestrian_predected = pedrestrian_success = 0

    # Paso a escalas de grises
    if grayscale:
        img = grayscaled_img(
            img)  # Los hogs solo se pueden calcular sobre escala de grises

    # Normalizo
    if must_normalize:
        img = normalize_img(img)

    # Comienzo a correr la ventana deslizante
    while y < height:
        x = 0
        while x < width:
            try:
                # sub_img = img[y:y + block_heigth, x:x + block_width, :]  # Obtengo una subregion/subimagen
                sub_img = img[
                    y:y + block_heigth,
                    x:x + block_width, :]  # Obtengo una subregion/subimagen
            except IndexError:
                # Puede ser posible que algunas imagenes sin RGB arroje este error
                # sub_img = img[y:y + block_heigth, x:x + block_width]
                sub_img = img[y:y + block_heigth, x:x + block_width]
            finally:
                sub_img = resize(sub_img)

            sub_img_hog = hog(sub_img,
                              block_norm='L2-Hys',
                              transform_sqrt=True)
            predictions = classifier_svm.predict([sub_img_hog])

            if SHOW_IMG and DRAW_SLIDING_WINDOW:
                draw_rectangle(x, y, block_width, block_heigth, 'yellow')

            # Busco los falsos positivos!
            if predictions[0] == 1:
                pedrestrian_predected += 1

                img_box = [x, y, x + block_width, y + block_heigth]
                # Veo si tiene algun IOU que valga la pena con algun bounding box de peatones declarados
                must_be_added = False
                intersects_with_pedestrian = False
                for pedestrian_bounding_box in pedestrians_bounding_boxes:
                    # Grafico el bounding boxs si asi se quiere
                    if SHOW_IMG and DRAW_PEDRESTRIAN_BOUNDING_BOX:
                        draw_rectangle(pedestrian_bounding_box[0],
                                       pedestrian_bounding_box[1],
                                       pedestrian_bounding_box[2],
                                       pedestrian_bounding_box[3])

                    box_to_iou = [
                        pedestrian_bounding_box[0], pedestrian_bounding_box[1],
                        pedestrian_bounding_box[0] +
                        pedestrian_bounding_box[2],
                        pedestrian_bounding_box[1] + pedestrian_bounding_box[3]
                    ]
                    # Calculo el IOU
                    iou = utils.get_iou(img_box, box_to_iou)
                    # print("iou", iou)

                    # Si es menor que el limite de IOU seteado, lo considero para agregar al HNM
                    if iou < IOU_limit:
                        must_be_added = True
                    else:
                        # Grafico en verde los peatones correctamente detectados
                        # draw_rectangle(x, y, block_width, block_heigth, '#008744')
                        draw_rectangle(x, y, block_width, block_heigth, 'blue')
                        intersects_with_pedestrian = True
                if not intersects_with_pedestrian and must_be_added:
                    # Si no esta arriba de una persona y es un falso
                    # positivo lo grafico en rojo ('#d62d20') en la imagen
                    # y lo considero para hacer hard negative mining
                    draw_rectangle(x, y, block_width, block_heigth, '#d62d20')
                    hogs_to_hard_mining.append(
                        sub_img_hog
                    )  # Almaceno para hacer hard negative mining
                else:
                    pedrestrian_success += 1
            x += stride_x
        y += stride_y

    if SHOW_IMG:
        plt.title(
            "Bounding boxes en negro. Ventana deslizante en amarillo. Falsos positivos en Rojo. Positivos en Verde"
        )
        plt.show()  # Muestro la imagen

    return hogs_to_hard_mining, total_pedestrian, pedrestrian_predected, pedrestrian_success
Пример #20
0
def matching_cascade(tracks,
                     detections,
                     kalman_filter,
                     label_index,
                     age=3,
                     init_age=3,
                     gating_threshold=9.4877,
                     iou_threshold=0.3):
    """matching cascade
    tracking list: [1, 2, ..., N]
    detection list: [1, 2, ..., M]
    1. matching by maha_distance.
    2. if it is tentative, the max age is 3.
       if age > 3, just delete this tracker.
    3. if it has been matched, the max age is 30.
       if it has been matched, the age is set to 0.
       and we need to update the location of bounding boxes that have been matched.
       if it has not been matched, the age is added 1.
    4. How to match targets?
                                                           predict by linear model
                            tracking list(previous frame) -----------------------> tracking list(current frame)
                                                                                                |
                                                                                                |
                                                            if min(distance) < 9.4877           V
       update target by detection(measurement) that matched <-----------------------   compute maha distance

    Args:
        tracks:             a list of trackers    [x, y, a, h]
        detections:         a list of detections  [x, y, a, h]
        kalman_filter:      KalmanFilter object
        label_index:        the label that is monotonically increased
        age:                the max age of confirmed trackers
        init_age:           the max age of tentative(unconfirmed) trackers
        gating_threshold:   the threshold of maha_distance
        iou_threshold:      iou threshold of iou matching

    Returns:
        new_tracks

    """
    num_trackers = len(tracks)

    delete_index = []

    # starting tracking
    for i in range(num_trackers):
        tracker = tracks[i]

        # the last frame optimal estimation
        mean = tracker.mean
        cov = tracker.cov
        measure = tracker.measurement

        # predict the current estimation by transformation matrix
        mean_pred, cov_pred = kalman_filter.predict(mean, cov)
        tracker.update(mean_pred, cov_pred, measure)

        # age = age + 1
        tracker.predict()

        if len(detections) > 0:
            if tracker.tentative and tracker.age <= init_age:
                maha_distances = kalman_filter.maha_distance(
                    mean_pred, cov_pred, detections)
                min_distance = np.min(maha_distances)
                min_arg = np.argmin(maha_distances)
                if min_distance <= gating_threshold:
                    # 1.set tracker.tentative = False and age = 0
                    # 2.update distribution and measurement
                    # 3.delete this detection in detections
                    # 4.label this target
                    tracker.matching()
                    # update prediction results by kalman filter
                    new_mean, new_cov = kalman_filter.update(
                        mean_pred, cov_pred, detections[min_arg])
                    tracker.update(new_mean, new_cov, detections[min_arg])
                    detections.pop(min_arg)
                    # set label
                    label_index += 1
                    tracker.label(label_index)

            elif (not tracker.tentative) and tracker.age <= age:
                maha_distances = kalman_filter.maha_distance(
                    mean_pred, cov_pred, detections)
                min_distance = np.min(maha_distances)
                min_arg = np.argmin(maha_distances)
                if min_distance <= gating_threshold:
                    # 1.set tracker.tentative = False and age = 0
                    # 2.update distribution and measurement
                    # 3.delete this detection in detections
                    tracker.matching()
                    # update prediction results by kalman filter
                    new_mean, new_cov = kalman_filter.update(
                        mean_pred, cov_pred, detections[min_arg])
                    tracker.update(new_mean, new_cov, detections[min_arg])
                    detections.pop(min_arg)

        if tracker.tentative and tracker.age > init_age:
            delete_index.append(i)

        if (not tracker.tentative) and tracker.age > age:
            delete_index.append(i)

    # delete trackers
    new_tracks = []
    delete_set = set(delete_index)
    total_set = set(np.arange(num_trackers))
    remain_set = total_set - delete_set
    for k in remain_set:
        new_tracks.append(tracks[k])

    # IOU association on the set of unconfirmed and unmatched tracks of age n = 1
    for j, tracker in enumerate(new_tracks):
        tentative = tracker.tentative
        age = tracker.age
        mean_ = tracker.mean
        cov_ = tracker.cov

        if tentative and age == 1:
            if len(detections) > 0:
                tracker_measure = tracker.measurement
                ious = get_iou(tracker_measure, detections)
                max_iou = np.max(ious)
                max_arg = np.argmax(ious)
                if max_iou >= iou_threshold:
                    tracker.matching()
                    # update prediction results by kalman filter
                    new_mean, new_cov = kalman_filter.update(
                        mean_, cov_, detections[max_arg])
                    tracker.update(new_mean, new_cov, detections[max_arg])
                    detections.pop(max_arg)
                    # set label
                    label_index += 1
                    tracker.label(label_index)

    # initialize unmatched detections
    if len(detections) > 0:
        for t, detection in enumerate(detections):
            mean_init, cov_init = kalman_filter.initiate(detection)
            new_tracker = create_tracker(mean_init, cov_init, detection)
            new_tracks.append(new_tracker)

    return new_tracks, label_index
Пример #21
0
def createTargets(imageMeta, imageDims, cfg):
    #in: imageMeta
    #out: non-reduced targets

    #############################
    ########## Image ############
    #############################
    bboxes = imageMeta['objects']
    scale = imageDims['scale']
    reduced_shape = imageDims['redux_shape']
    image_height = reduced_shape[0]
    image_width = reduced_shape[1]

    #############################
    ###### Set Parameters #######
    #############################
    rpn_stride = cfg.rpn_stride

    output_width = int(image_width / rpn_stride)
    output_height = int(image_height / rpn_stride)

    anchor_sizes = cfg.anchor_sizes
    anchor_ratios = cfg.anchor_ratios

    num_anchors = len(anchor_sizes) * len(anchor_ratios)

    rpn_min_overlap = cfg.rpn_min_overlap
    rpn_max_overlap = cfg.rpn_max_overlap

    #############################
    #### Initialize matrices ####
    #############################
    y_rpn_overlap = np.zeros((output_height, output_width, num_anchors))
    y_is_box_valid = np.zeros((output_height, output_width, num_anchors))
    y_rpn_regr = np.zeros((output_height, output_width, num_anchors * 4))
    y_rpn_ancs = np.zeros((output_height, output_width, num_anchors * 4))

    num_bboxes = len(bboxes)

    num_anchors_for_gtbox = np.zeros(num_bboxes).astype(int)
    best_anchor_for_gtbox = -1 * np.ones((num_bboxes, 4)).astype(int)
    best_iou_for_gtbox = np.zeros(num_bboxes).astype(np.float32)
    best_x_for_gtbox = np.zeros((num_bboxes, 4)).astype(int)
    best_dx_for_gtbox = np.zeros((num_bboxes, 4)).astype(np.float32)

    #############################
    ##### Ground truth boxes ####
    #############################
    gta = helper.normalizeGTboxes(bboxes, scale=scale, roundoff=False)
    #    draw.drawHOI(image, gta[0,:], gta[0,:])

    #############################
    # Map ground truth 2 anchor #
    #############################
    for anchor_size_idx in range(len(anchor_sizes)):
        for anchor_ratio_idx in range(len(anchor_ratios)):
            #            w_anc = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][0]
            #            h_anc = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][1]

            size_ratio = cfg.rpn_stride**2
            w = np.round(np.sqrt(size_ratio / anchor_ratios[anchor_ratio_idx]))
            h = w * anchor_ratios[anchor_ratio_idx]

            w_anc = w * anchor_sizes[anchor_size_idx]
            h_anc = h * anchor_sizes[anchor_size_idx]

            for ix in range(output_width):
                xmin_anc = float(rpn_stride) * (ix + 0.5) - w_anc / 2
                xmax_anc = float(rpn_stride) * (ix + 0.5) + w_anc / 2
                if xmin_anc < 0 or xmax_anc > image_width - 1:
                    continue

                for jy in range(output_height):
                    ymin_anc = float(rpn_stride) * (jy + 0.5) - h_anc / 2
                    ymax_anc = float(rpn_stride) * (jy + 0.5) + h_anc / 2
                    if ymin_anc < 0 or ymax_anc > image_height - 1:
                        continue

                    bbox_type = 'neg'
                    best_iou_for_loc = 0.0
                    at = {
                        'xmin': xmin_anc,
                        'ymin': ymin_anc,
                        'xmax': xmax_anc,
                        'ymax': ymax_anc
                    }
                    #                print((rpn_stride*(ix+0.5), rpn_stride*(jy+0.5)), anchor_sizes[anchor_size_idx], anchor_ratios[anchor_ratio_idx])
                    for gtidx in range(num_bboxes):
                        gt = gta[gtidx]
                        curr_iou = utils.get_iou(gt, at)

                        if curr_iou > best_iou_for_gtbox[
                                gtidx] or curr_iou > rpn_max_overlap:
                            tx, ty, tw, th = helper.get_GT_deltas(gt, at)

#                            bxmin, bymin, bw, bh = helper.apply_regr([at['xmin'],at['ymin'],at['xmax']-at['xmin'],at['ymax']-at['ymin']], [tx,ty,tw,th])
#                            print(curr_iou)
#                            print('at',at['xmin'], at['ymin'], at['xmax'], at['ymax'])
#                            print('gt',gt['xmin'], gt['ymin'], gt['xmax'], gt['ymax'])
#                            print('bb',bxmin, bymin, bxmin + bw, bymin + bh)

# all GT boxes should be mapped to an anchor box, so we keep track of which anchor box was best
                        if curr_iou > best_iou_for_gtbox[gtidx]:
                            best_anchor_for_gtbox[gtidx] = [
                                jy, ix, anchor_ratio_idx, anchor_size_idx
                            ]
                            best_iou_for_gtbox[gtidx] = curr_iou
                            best_x_for_gtbox[gtidx, :] = [
                                at['xmin'], at['xmax'], at['ymin'], at['ymax']
                            ]
                            best_dx_for_gtbox[gtidx, :] = [tx, ty, tw, th]

    # we set the anchor to positive if the IOU is >0.7 (it does not matter if there was another better box, it just indicates overlap)
                        if curr_iou > rpn_max_overlap:
                            #                            print(curr_iou, at)
                            #                        print(anchor_sizes[anchor_size_idx], anchor_ratios[anchor_ratio_idx])
                            bbox_type = 'pos'
                            num_anchors_for_gtbox[gtidx] += 1
                            # we update the regression layer target if this IOU is the best for the current (x,y) and anchor position
                            if curr_iou > best_iou_for_loc:
                                best_iou_for_loc = curr_iou
                                best_regr = (tx, ty, tw, th)

    # if the IOU is >0.3 and <0.7, it is ambiguous and no included in the objective
                        if rpn_min_overlap < curr_iou < rpn_max_overlap:
                            # gray zone between neg and pos
                            if bbox_type != 'pos':
                                bbox_type = 'neutral'

    # turn on or off outputs depending on IOUs
                    anc_idx = (anchor_ratio_idx +
                               len(anchor_ratios) * anchor_size_idx)
                    if bbox_type == 'neg':
                        y_is_box_valid[jy, ix, anc_idx] = 1
                        y_rpn_overlap[jy, ix, anc_idx] = 0
                    elif bbox_type == 'neutral':
                        y_is_box_valid[jy, ix, anc_idx] = 0
                        y_rpn_overlap[jy, ix, anc_idx] = 0
                    elif bbox_type == 'pos':
                        y_is_box_valid[jy, ix, anc_idx] = 1
                        y_rpn_overlap[jy, ix, anc_idx] = 1
                        y_rpn_regr[jy, ix,
                                   4 * anc_idx:4 * anc_idx + 4] = best_regr
                        y_rpn_ancs[jy, ix, 4 * anc_idx:4 * anc_idx + 4] = [
                            xmin_anc, ymin_anc, xmax_anc - xmin_anc,
                            ymax_anc - ymin_anc
                        ]

    #############################
    ##### Ensure GT Anchors #####
    #############################
    # we ensure that every bbox has at least one positive RPN region
    for idx in range(num_anchors_for_gtbox.shape[0]):
        #        print('anchors', idx)
        if num_anchors_for_gtbox[idx] == 0:
            # no box with an IOU greater than zero ...
            #            print('no anchors', idx, gta[idx])
            if best_anchor_for_gtbox[idx, 0] == -1:
                continue

            anc_idx = best_anchor_for_gtbox[
                idx, 2] + len(anchor_ratios) * best_anchor_for_gtbox[idx, 3]
            y_is_box_valid[best_anchor_for_gtbox[idx, 0],
                           best_anchor_for_gtbox[idx, 1], anc_idx] = 1
            y_rpn_overlap[best_anchor_for_gtbox[idx, 0],
                          best_anchor_for_gtbox[idx, 1], anc_idx] = 1
            y_rpn_regr[best_anchor_for_gtbox[idx,
                                             0], best_anchor_for_gtbox[idx, 1],
                       4 * anc_idx:4 * anc_idx + 4] = best_dx_for_gtbox[idx, :]

#    y_rpn_overlap = np.transpose(y_rpn_overlap, (2, 0, 1))
    y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0)

    #    y_is_box_valid = np.transpose(y_is_box_valid, (2, 0, 1))
    y_is_box_valid = np.expand_dims(y_is_box_valid, axis=0)

    #    y_rpn_regr = np.transpose(y_rpn_regr, (2, 0, 1))
    y_rpn_regr = np.expand_dims(y_rpn_regr, axis=0)

    return [
        np.copy(y_rpn_overlap),
        np.copy(y_rpn_regr),
        np.copy(y_is_box_valid)
    ]
Пример #22
0
def model_fn(features, labels, mode, params):
    """Model function."""

    is_training = mode == tf.estimator.ModeKeys.TRAIN
    query, len_q, ref, len_r = features
    batch_size = tf.shape(query)[0]

    # Video feature aggregation (Sec. 3.1).
    cell = tf.nn.rnn_cell.BasicLSTMCell(params.mem_dim)
    with tf.variable_scope('video_lstm', reuse=tf.AUTO_REUSE):
        out1, state1 = tf.nn.dynamic_rnn(cell, query, len_q, dtype=tf.float32)
        out2, state2 = tf.nn.dynamic_rnn(cell, ref, len_r, dtype=tf.float32)
    out1 = slim.dropout(out1,
                        keep_prob=params.keep_prob,
                        is_training=is_training)
    out2 = slim.dropout(out2,
                        keep_prob=params.keep_prob,
                        is_training=is_training)

    # Matching (Sec. 3.2).
    forward = tf.nn.rnn_cell.BasicLSTMCell(params.att_dim, name='forward')
    forward = MatchCellWrapper(forward, out1, len_q)
    backward = tf.nn.rnn_cell.BasicLSTMCell(params.att_dim, name='backward')
    backward = MatchCellWrapper(backward, out1, len_q, reuse=tf.AUTO_REUSE)
    with tf.variable_scope('att'):
        forward_out, forward_state = tf.nn.dynamic_rnn(forward,
                                                       out2,
                                                       len_r,
                                                       dtype=tf.float32)
        out2_reverse = tf.reverse_sequence(out2, len_r, 1, 0)
        backward_out, backward_state = tf.nn.dynamic_rnn(backward,
                                                         out2_reverse,
                                                         len_r,
                                                         dtype=tf.float32)
        backward_out = tf.reverse_sequence(backward_out, len_r, 1, 0)
    h = tf.concat([forward_out, backward_out], axis=2, name='concat_H')
    h = slim.dropout(h,
                     keep_prob=params.keep_prob + 0.2,
                     is_training=is_training)

    # Localization (Section 3.3).
    pointer = tf.nn.rnn_cell.BasicLSTMCell(params.att_dim)
    maxlen = tf.shape(h)[1]
    with tf.variable_scope('pointer'):
        point_out, _ = tf.nn.dynamic_rnn(pointer, h, len_r, dtype=tf.float32)
        logits = slim.fully_connected(point_out,
                                      4,
                                      activation_fn=None,
                                      scope='loc')

    # Make predictions.
    def map_body(x):
        logits = x[0]
        length = x[1]
        logits = logits[:length]
        prob = tf.nn.log_softmax(logits, axis=1)
        prob = tf.transpose(prob)

        initial_it = tf.constant(0, dtype=tf.int32)
        initial_idx_ta = tf.TensorArray(tf.int32,
                                        size=0,
                                        dynamic_size=True,
                                        element_shape=[])
        initial_val_ta = tf.TensorArray(tf.float32,
                                        size=0,
                                        dynamic_size=True,
                                        element_shape=[])

        def cond(it, *unused):
            # Limits the length to be smaller than 1024 frames.
            return it < tf.minimum(length, 64)

        def while_body(it, idx_ta, val_ta):
            # Eq. (11) is implemented here.
            total = tf.cond(tf.equal(it, 0),
                            lambda: tf.reduce_sum(prob[:2], axis=0),
                            lambda: prob[0, :-it] + prob[1, it:])

            def get_inside():
                score = tf.tile(prob[2, None, :], [it, 1])
                score = tf.reverse_sequence(score,
                                            tf.zeros([it], tf.int32) + length,
                                            1, 0)
                score = tf.reverse_sequence(score, length - tf.range(it), 1, 0)
                score = score[:, :-it]
                score = tf.reduce_mean(score, axis=0)
                return score

            ave = tf.cond(tf.equal(it, 0), lambda: prob[2], get_inside)
            total += ave
            idx = tf.argmax(total, output_type=tf.int32, name='max1')
            idx_ta = idx_ta.write(it, idx)
            val_ta = val_ta.write(it, total[idx])
            it += 1
            return it, idx_ta, val_ta

        res = tf.while_loop(cond, while_body,
                            [initial_it, initial_idx_ta, initial_val_ta])
        final_idx = res[1].stack()
        final_val = res[2].stack()
        idx = tf.argmax(final_val, output_type=tf.int32)
        pred = tf.stack([final_idx[idx], final_idx[idx] + idx + 1])
        return pred

    predictions = tf.map_fn(map_body, [logits, len_r], tf.int32)
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
        )

    # Loss computation.
    idx = tf.stack([tf.range(batch_size), labels[:, 0]], axis=1)
    label_st = tf.scatter_nd(idx, tf.ones(batch_size), [batch_size, maxlen])
    idx = tf.stack([tf.range(batch_size), labels[:, 1] - 1], axis=1)
    label_en = tf.scatter_nd(idx, tf.ones(batch_size), [batch_size, maxlen])
    inside_t = tf.sequence_mask(labels[:, 1] - labels[:, 0], maxlen)
    inside_t = tf.reverse_sequence(inside_t, labels[:, 1], 1, 0)
    outside = tf.logical_not(inside_t)
    inside_t = tf.to_float(inside_t)
    outside = tf.to_float(outside)
    label = tf.stack([label_st, label_en, inside_t, outside], axis=2)

    # Eq. (10)
    heavy = tf.reduce_sum(label[:, :, :2], axis=-1) > 0.9
    heavy = tf.to_float(heavy) * 9 + 1
    label = label / tf.reduce_sum(label, axis=2, keepdims=True)
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)
    loss *= heavy
    mask = tf.sequence_mask(len_r, maxlen)
    loss = tf.boolean_mask(loss, mask)
    loss = tf.reduce_mean(loss)
    model_params = tf.trainable_variables()
    weights = [i for i in model_params if 'bias' not in i.name]
    loss += params.weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in weights])

    # Optimization.
    gradients = tf.gradients(loss, model_params)
    clipped_gradients, gradient_norm = tf.clip_by_global_norm(
        gradients, params.max_gradient_norm)
    tf.summary.scalar('grad_norm', gradient_norm)
    tf.summary.scalar('clipped_gradient', tf.global_norm(clipped_gradients))

    # boundaries = [200, 400, 600]
    # staged_lr = [params.learning_rate * x for x in [1, 0.1, 0.01, 0.002]]
    # learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(),
    #                                             boundaries, staged_lr)
    # tf.summary.scalar('learning_rate', learning_rate)
    tensors_to_log = {
        'loss': loss,
        'step': tf.train.get_global_step(),
        'len_q': tf.shape(features[0])[1],
        'len_r': tf.shape(features[2])[1]
    }
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=10)
    train_hooks = [logging_hook]
    optimizer = tf.train.AdamOptimizer(params.learning_rate)

    if is_training:
        train_op = optimizer.apply_gradients(
            zip(clipped_gradients, model_params), tf.train.get_global_step())
    else:
        train_op = None

    # Evaluation.
    iou = get_iou(predictions, labels)
    metrics = get_eval_metric(iou)

    for variable in tf.trainable_variables():
        tf.summary.histogram(variable.op.name, variable)

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      training_hooks=train_hooks,
                                      eval_metric_ops=metrics)
Пример #23
0
def run_training_loop(model, optimizer, scheduler, device, train_loader,
                      test_loader, criterion_classification,
                      criterion_localization, epochs):
    logger.info('Start training')
    for epoch in range(epochs):
        logger.debug(f'Epoch {epoch + 1}')

        model.train()
        running_loss_ce = 0.0
        running_loss_mse = 0.0
        for i, data in enumerate(train_loader):
            images, labels, bboxes_gt = data
            bboxes_gt = torch.stack(bboxes_gt, dim=1)

            images = images.to(device)
            labels = labels.to(device)
            bboxes_gt = bboxes_gt.to(device)

            optimizer.zero_grad()

            logits, bboxes = model(images)
            loss_ce = criterion_classification(logits, labels)
            loss_mse = criterion_localization(bboxes, bboxes_gt.float()) * 50
            loss = loss_ce + loss_mse
            loss.backward()
            optimizer.step()

            running_loss_ce += loss_ce.item()
            running_loss_mse += loss_mse.item()
            print_every = 5
            if (i + 1) % print_every == 0:
                running_loss_ce = running_loss_ce / print_every
                running_loss_mse = running_loss_mse / print_every
                logger.debug(f'[{epoch + 1}, {i + 1}] '
                             f'loss_ce: {running_loss_ce:.3f}, '
                             f'loss_mse {running_loss_mse:.3f}')
                running_loss_ce = 0
                running_loss_mse = 0

        scheduler.step()
        correct = 0
        total = 0
        iou = 0
        model.eval()
        with torch.no_grad():
            for data in test_loader:
                test_images, test_labels, test_bboxes = data
                test_images = test_images.to(device)

                outputs = model(test_images)
                _, predicted = torch.max(outputs[0].cpu().data, 1)
                total += test_labels.size(0)
                correct += (predicted == test_labels).sum().item()
                bbox_gt = [a.item() for a in test_bboxes]

                bbox = outputs[1].cpu().data.numpy().flatten()
                iou += utils.get_iou(bbox_gt, bbox)

        iou = iou / total
        accuracy = 100 * correct / total
        logger.info(f'Test -- Accuracy: {accuracy:.4f}, IoU: {iou:.4f}')
def gen_crop(img, boxes, landmark=None, display=False):

    height, width, _ = img.shape

    # 每一种类别的数量, pos, part, neg, landmark
    crop_nums = [0] * 4

    # 先随机产生一定数量neg-img
    if landmark is None:
        crop_nums[2] += yield from gen_neg_img(img, boxes)        

    # 在每个人脸框附近产生每个类别的数据
    for box in boxes:
        x1, y1, w, h = box

        # 忽略小脸
        if min(w, h) < 20 or x1 < 0 or y1 < 0:
            continue

        if landmark is not None:
            yield img[y1: y1+h, x1: x1+w], -2, (*[0.] * 4, (landmark - box[:2]) / box[3:]), 

        # 每个框判断
        for i in range(15):

            size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.2 * max(w, h)))

            delta_x = npr.randint(-w * 0.2, w * 0.2)
            delta_y = npr.randint(-h * 0.2, h * 0.2)


            nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0))
            ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0))

            nx2 = nx1 + size
            ny2 = ny1 + size
            if nx2 > width or ny2 > height:
                continue

            crop_box = np.array([nx1, ny1, size, size])

            # crop
            cropped_im = img[ny1: ny2, nx1: nx2]

            box_ = box.reshape(1, -1)
            iou = get_iou(crop_box, box_)

            if iou < 0.4:
                continue
            
            # yu gt de offset
            offset_x1 = (x1 - nx1) / size
            offset_y1 = (y1 - ny1) / size

            offset_x2 = (x1+w-nx1-size) / size
            offset_y2 = (y1+h-ny1-size) / size

            if iou >= 0.65:
                if landmark is None:
                    yield cropped_im, 1, (offset_x1, offset_y1, offset_x2, offset_y2)
                    crop_box[0] += 1
                else:
                    marks = (landmark - crop_box[:2]) / crop_box[3:]
                    yield cropped_im, -2, (offset_x1, offset_y1, offset_x2, offset_y2, marks)
                    crop_box[3] += 1

            elif landmark is None and iou >= 0.4:
                yield cropped_im, -1, (offset_x1, offset_y1, offset_x2, offset_y2)
                crop_box[1] += 1

    if display:
        print("pos: %d part: %d neg: %d lanmark: %d" % tuple(crop_box))
Пример #25
0
def get_VDG(box1, box2):
    '''
    Visual Dependency Grammar
    input: 2 bbox
    output: spatial relation between 2 boxes
    size of image 785 x 1024 (width = 1024, height = 785)
    '''
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    box1_overlay = utils.percent_overlap(box1, box2)
    box2_overlay = utils.percent_overlap(box2, box1)
    iou = utils.get_iou(box1, box2)

    if box2_overlay > 0.9:
        # box2 is in box1 --> box1 - '13':'covering' - box2
        predicate = 'covering'
        encode = '13'
        return (predicate, encode)

    if box1_overlay > 0.5:
        # box1 - '31':'on' - box2
        predicate = 'on'
        encode = '31'
        return (predicate, encode)

    box_np1 = np.asarray(box1)
    box_np2 = np.asarray(box2)
    centroid1 = np.asarray([(box1[0] + box1[2]) / 2, (box1[1] + box1[3]) / 2])
    centroid2 = np.asarray([(box2[0] + box2[2]) / 2, (box2[1] + box2[3]) / 2])
    vec_centroid = centroid2 - centroid1
    vec_anchor = np.asarray([1, 0])

    unit_vector_1 = vec_centroid / np.linalg.norm(vec_centroid)
    unit_vector_2 = vec_anchor / np.linalg.norm(vec_anchor)
    dot_product = np.dot(unit_vector_1, unit_vector_2)  # cos(alpha)
    thres_cos_1 = np.cos(45 * np.pi / 180)
    thres_cos_2 = np.cos(135 * np.pi / 180)

    if dot_product > thres_cos_1 or dot_product < thres_cos_2:  # beside or opposite
        if np.abs(
                centroid1[0] - centroid2[0]
        ) / 1024 > 0.7:  # And different size of 2 objects (need implement)
            predicate = 'across'  # '2'
            encode = '2'
            return (predicate, encode)
        else:
            predicate = 'near'  # '29'
            encode = '29'
            return (predicate, encode)

    if dot_product <= thres_cos_1 and dot_product >= thres_cos_2:  # below or above
        if box1_overlay < 0.1 and box2_overlay < 0.1:
            if centroid1[1] > centroid2[1] and box1[1] > centroid2[1]:
                predicate = 'under'  # '43'
                encode = '43'
                return (predicate, encode)
            if centroid1[1] <= centroid2[1] and box1[3] <= centroid2[1]:
                predicate = 'above'  # '1'
                encode = '1'
                return (predicate, encode)
            else:
                predicate = 'near'  # '29'
                encode = '29'
                return (predicate, encode)
        else:
            predicate = 'near'  # '29'
            encode = '29'
            return (predicate, encode)
Пример #26
0
def combineSimilarBBs(imagesMeta, labels, minIoU=0.4):
    new_imagesMeta = {}
    for imageID, imageMeta in imagesMeta.items():
        #        print('ID', imageID)
        data = {'prsBB': [], 'objBB': [], 'labels': []}
        nb_rels = 0
        for relID, rel in imageMeta['rels'].items():
            data['prsBB'].append(rel['prsBB'])
            data['objBB'].append(rel['objBB'])
            data['labels'].append(rel['label'])
            nb_rels += 1

        for key in ['prsBB', 'objBB']:
            bbData = data[key]
            similars = np.array([i for i in range(nb_rels)], dtype=np.int)
            already_taken = []
            disabled = []
            while True:
                should_I_stay_or_should_I_go = 'go'
                for fstID, fstBB in enumerate(bbData[0:-1]):
                    if fstID in already_taken:
                        continue
                    for secID, secBB in enumerate(bbData[fstID + 1:]):
                        secID += fstID + 1
                        #                        print(data['labels'])
                        #                        print(data['labels'][fstID])
                        fstNames = labels[data['labels'][fstID]]
                        secNames = labels[data['labels'][secID]]
                        if key == 'objBB' and \
                           (fstNames['pred'] == secNames['pred'] or \
                            fstNames['obj'] != secNames['obj']):
                            #                            print(fstNames, secNames)
                            continue
                        if secID in disabled:
                            continue


#                        print('bbs',key, fstBB, secBB)
#                        print(utils.get_iou(fstBB, secBB, weight=True))
                        if utils.get_iou(fstBB, secBB, weight=True) > minIoU:
                            if similars[secID] != secID:

                                similars[similars == fstID] = similars[secID]
                                already_taken.append(fstID)
                            else:
                                similars[similars == secID] = fstID
                                already_taken.append(secID)
                            should_I_stay_or_should_I_go = 'stay'
                if should_I_stay_or_should_I_go == 'go':
                    # converged
                    break
                new_bbData = [{} for i in range(len(bbData))]
                tmp_conn = []
                for sim in similars:
                    bb = bbData[sim]
                    if sim in tmp_conn:
                        meanBB = new_bbData[sim]
                        meanBB = utils.meanBB(meanBB, bb)
                    else:
                        meanBB = bb
                    new_bbData[sim] = meanBB
                bbData = new_bbData
                disabled = already_taken
            data[key] = bbData
            data[key + 'sims'] = similars

        tmp_rels = {}
        for relID in range(nb_rels):
            prsIdx = data['prsBBsims'][relID]
            objIdx = data['objBBsims'][relID]
            label = data['labels'][relID]
            if imageID == 'HICO_test2015_00000007.jpg':
                print(prsIdx, objIdx, label)
            if prsIdx not in tmp_rels:
                tmp_rels[prsIdx] = {}
            if objIdx not in tmp_rels[prsIdx]:
                tmp_rels[prsIdx][objIdx] = []
            tmp_rels[prsIdx][objIdx].append(label)

        rels = {}
        relID = 0
        for prsIdx, sub_rels in tmp_rels.items():
            for objIdx, insLabels in sub_rels.items():
                prsBB = data['prsBB'][prsIdx]
                objBB = data['objBB'][objIdx]
                rel = {'prsBB': prsBB, 'objBB': objBB, 'labels': insLabels}
                rels[relID] = rel
                relID += 1
        new_imagesMeta[imageID] = {
            'imageName': imageMeta['imageName'],
            'rels': rels
        }
    return new_imagesMeta
Пример #27
0
    def __getitem__(self, idx):

        out_dict = {}
        out_dict['args'] = self.args

        datum = self.data[idx]
        # uid = datum['uid']
        # out_dict['uid'] = uid

        # test = 'test' in datum['annot_id']
        # out_dict['is_test'] = test

        ###### Image ######
        if self.args.use_vision:
            img_id = datum['image_id']
            out_dict['img_id'] = img_id

            # img_path = coco_img_dir.joinpath(datum['img_fn'])
            # assert img_path.exists()
            # out_dict['img_path'] = img_path

            # source = self.img_ids_to_source[img_id]
            source = self.split

            f = self.source_to_h5[source]

            if isinstance(f, Path):
                f = h5py.File(f, 'r')
                self.source_to_h5[source] = f

            img_h = f[f'{img_id}/img_h'][()]
            img_w = f[f'{img_id}/img_w'][()]

            # pred_boxes = f[f'{img_id}/boxes']

            boxes = f[f'{img_id}/boxes'][:self.args.n_boxes]

            # shuffle box order
            if self.args.shuffle_boxes and self.mode == 'train':
                box_indices = np.arange(len(boxes))
                np.random.shuffle(box_indices)

                boxes = boxes[box_indices]

            n_boxes = len(boxes)

            out_dict['n_boxes'] = n_boxes

            ref_box = datum['refBox']

            ref_box = xywh_to_xyxy(np.array([ref_box]))

            ious = get_iou(torch.tensor(boxes, dtype=torch.float),
                           torch.tensor(ref_box, dtype=torch.float))

            threshold = 0.5
            scores = ious.detach().numpy().flatten()
            scores[scores < threshold] = 0
            scores = scores.astype(np.float64)

            exists_target = scores.sum() > 0

            if exists_target:
                correct_indices = np.nonzero(scores)[0].tolist()
                prob = scores / scores.sum()

                choice = np.random.multinomial(1, prob).argmax()
            else:
                correct_indices = []
                choice = -100

            # Normalize the boxes (to 0 ~ 1)
            boxes[:, (0, 2)] /= img_w
            boxes[:, (1, 3)] /= img_h

            np.testing.assert_array_less(boxes, 1 + 1e-5)
            # np.testing.assert_array_less(boxes, 1+5e-2)
            np.testing.assert_array_less(-boxes, 0 + 1e-5)
            boxes = torch.from_numpy(boxes)

            # assert boxes.size() == (36, 4), (boxes.size(),
            #                                  datum['img_id'], gt_boxes.shape, pred_boxes.shape)

            boxes.clamp_(min=0.0, max=1.0)

            out_dict['boxes'] = boxes

            feats = f[f'{img_id}/features'][:self.args.n_boxes]

            if self.args.shuffle_boxes and self.mode == 'train':
                feats = feats[box_indices]

            feats = torch.from_numpy(feats)

            out_dict['vis_feats'] = feats
            out_dict['boxes'] = boxes

        ###### Text #####x

        sent = datum['caption']

        # prefix = "refer expressions:"
        prefix = "visual grounding:"
        # prefix = "grounding:"
        input_text = f'{prefix} {sent}'

        if exists_target:
            if self.args.vis_pointer:
                all_target_ids = correct_indices
                target_text = ''
            else:
                target_text = f'<vis_extra_id_{choice}>'
                all_target_ids = self.tokenizer.convert_tokens_to_ids(
                    [f'<vis_extra_id_{idx}>' for idx in correct_indices])

        else:
            if self.args.vis_pointer:
                all_target_ids = []
                target_text = ''
            else:
                target_text = ''
                all_target_ids = []

        out_dict['exists_target'] = exists_target
        out_dict['iou'] = ious
        out_dict['target'] = choice
        out_dict['all_targets'] = correct_indices
        out_dict['all_target_ids'] = all_target_ids

        input_ids = self.tokenizer.encode(input_text,
                                          max_length=self.args.max_text_length,
                                          truncation=True)
        target_ids = self.tokenizer.encode(
            target_text, max_length=self.args.max_text_length, truncation=True)

        out_dict['input_ids'] = torch.LongTensor(input_ids)
        out_dict['input_length'] = len(input_ids)
        out_dict['target_ids'] = torch.LongTensor(target_ids)
        out_dict['target_length'] = len(target_ids)

        out_dict['input_text'] = input_text
        out_dict['target_text'] = target_text

        return out_dict