Exemplo n.º 1
0
def detect(net,
           target_data,
           im_data,
           im_info,
           score_thresh=.1,
           features_given=True):
    """ 
    Detect single target object in a single scene image.

    Input Parameters:
        net: (TDID) the network
        target_data: (torch Variable) target images
        im_data: (torch Variable) scene_image
        im_info: (tuple) (height,width,channels) of im_data
        
        features_given(optional): (bool) if true, target_data and im_data
                                  are feature maps from net.features,
                                  not images. Default: True
                                    

    Returns:
        scores (ndarray): N x 2 array of class scores
                          (N boxes, classes={background,target})
        boxes (ndarray): N x 4 array of predicted bounding boxes
    """

    cls_prob, rois = net(target_data,
                         im_data,
                         im_info,
                         features_given=features_given)
    scores = cls_prob.data.cpu().numpy()[0, :, :]
    zs = np.zeros((scores.size, 1))
    scores = np.concatenate((zs, scores), 1)
    boxes = rois.data.cpu().numpy()[0, :, :]

    #get scores for foreground, non maximum supression

    inds = np.where(scores[:, 1] > score_thresh)[0]
    fg_scores = scores[inds, 1]
    fg_boxes = boxes[inds, :]
    fg_dets = np.hstack((fg_boxes, fg_scores[:, np.newaxis])) \
        .astype(np.float32, copy=False)
    keep = nms(fg_dets, cfg.TEST_NMS_OVERLAP_THRESH)
    fg_dets = fg_dets[keep, :]

    max_dets_per_target = 5
    image_scores = np.hstack([fg_dets[:, -1]])
    if len(image_scores) > max_dets_per_target:
        image_thresh = np.sort(image_scores)[-max_dets_per_target]
        keep = np.where(fg_dets[:, -1] >= image_thresh)[0]
        fg_dets = fg_dets[keep, :]

#    if len(fg_dets) > 0:
#        box = fg_dets[0]
#    else:
#        box = None
    return fg_dets
def test_net(model_name,
             net,
             dataloader,
             target_images,
             chosen_ids,
             cfg,
             max_dets_per_target=5,
             score_thresh=0.1,
             output_dir=None):
    """
    Test a TDID network.

    Input Parameters:
        model_name: (string) name of model for saving results
        net: (TDID) the network
        dataloader:  (torch DataLoader) dataloader for test set
        target_images: (dict) holds paths to target images
        chosen_ids: (list) list of object ids to test on
        cfg: (Config) config file
        
        max_dets_per_target (optional): (int) maximum number of detections 
                                        outputted for a single target/scene 
                                        image pair. Default: 5.
        score_thresh (optional): (float) minimum score a box must have to be 
                                 outputted. Default: .1
        output_dir (optional): (str) full path of directory to save results in
                               if None, nothing will be saved. 
                               Default: None. 
         

    """
    results = []
    num_images = len(dataloader)
    id_to_name = cfg.ID_TO_NAME
    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}

    if output_dir is not None:
        if not (os.path.isdir(output_dir)):
            os.makedirs(output_dir)
        det_file = os.path.join(output_dir, model_name + '.json')

    #load targets, maybe compute features
    target_features_dict = {}
    target_data_dict = {}
    for id_ind, t_id in enumerate(chosen_ids):
        target_name = id_to_name[t_id]
        if target_name == 'background':
            continue
        target_data = []
        for t_type, _ in enumerate(target_images[target_name]):
            img_ind = np.random.choice(
                np.arange(len(target_images[target_name][t_type])))
            target_img = cv2.imread(
                target_images[target_name][t_type][img_ind])
            target_img = normalize_image(target_img, cfg)
            target_data.append(target_img)

        target_data = match_and_concat_images_list(target_data)
        target_data = np_to_variable(target_data, is_cuda=True)
        target_data = target_data.permute(0, 3, 1, 2)
        if cfg.TEST_ONE_AT_A_TIME:
            target_data_dict[target_name] = target_data
        else:
            target_features_dict[target_name] = net.features(target_data)

    for i, batch in enumerate(dataloader):
        im_data = batch[0]
        #org_img = im_data
        im_info = im_data.shape[:]
        if cfg.TEST_RESIZE_IMG_FACTOR > 0:
            im_data = cv2.resize(im_data, (0, 0),
                                 fx=cfg.TEST_RESIZE_IMG_FACTOR,
                                 fy=cfg.TEST_RESIZE_IMG_FACTOR)
        im_data = normalize_image(im_data, cfg)
        im_data = np_to_variable(im_data, is_cuda=True)
        im_data = im_data.unsqueeze(0)
        im_data = im_data.permute(0, 3, 1, 2)

        #get image name and index
        img_name = batch[1][1]
        img_id = int(img_name[:-4])

        #get image features
        if not cfg.TEST_ONE_AT_A_TIME:
            img_features = net.features(im_data)

        for id_ind, t_id in enumerate(chosen_ids):
            target_name = id_to_name[t_id]
            if target_name == 'background':
                continue

            if cfg.TEST_ONE_AT_A_TIME:
                target_data = target_data_dict[target_name]
                _t['im_detect'].tic()
                scores, boxes = im_detect(net,
                                          target_data,
                                          im_data,
                                          im_info,
                                          features_given=False)
                detect_time = _t['im_detect'].toc(average=False)
            else:
                target_features = target_features_dict[target_name]
                _t['im_detect'].tic()
                scores, boxes = im_detect(net,
                                          target_features,
                                          img_features,
                                          im_info,
                                          features_given=True)
                detect_time = _t['im_detect'].toc(average=False)
            _t['misc'].tic()

            if cfg.TEST_RESIZE_IMG_FACTOR > 0:
                boxes *= (1.0 / cfg.TEST_RESIZE_IMG_FACTOR)

            #get scores for foreground, non maximum supression
            inds = np.where(scores[:, 1] > score_thresh)[0]
            fg_scores = scores[inds, 1]
            fg_boxes = boxes[inds, :]
            fg_dets = np.hstack((fg_boxes, fg_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(fg_dets, cfg.TEST_NMS_OVERLAP_THRESH)
            fg_dets = fg_dets[keep, :]

            # Limit to max_per_target detections *over all classes*
            if max_dets_per_target > 0:
                image_scores = np.hstack([fg_dets[:, -1]])
                if len(image_scores) > max_dets_per_target:
                    image_thresh = np.sort(image_scores)[-max_dets_per_target]
                    keep = np.where(fg_dets[:, -1] >= image_thresh)[0]
                    fg_dets = fg_dets[keep, :]
            nms_time = _t['misc'].toc(average=False)

            print( 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
                .format(i + 1, num_images, detect_time, nms_time))

            #put class id in the box
            fg_dets = np.insert(fg_dets, 4, t_id, axis=1)

            for box in fg_dets:
                cid = int(box[4])
                xmin = int(box[0])
                ymin = int(box[1])
                width = int(box[2] - box[0] + 1)
                height = int(box[3] - box[1] + 1)
                score = float(box[5])
                results.append({
                    'image_id': img_id,
                    'category_id': cid,
                    'bbox': [xmin, ymin, width, height],
                    'score': score
                })

                #org_img = cv2.rectangle(org_img, (box[0], box[1]), (box[2],box[3]), (255,0,0), 2)

        #cv2.imwrite('./out_img.jpg', org_img)
    if output_dir is not None:
        with open(det_file, 'w') as f:
            json.dump(results, f)
    return results
Exemplo n.º 3
0
                im_data = im_data.permute(0, 3, 1, 2).contiguous()
                target_data = np_to_variable(target_data, is_cuda=True)
                target_data = target_data.permute(0, 3, 1, 2).contiguous()

                scores, boxes = im_detect(net,
                                          target_data,
                                          im_data,
                                          im_info,
                                          features_given=False)

                inds = np.where(scores[:, 1] > 0.1)[0]
                fg_scores = scores[inds, 1]
                fg_boxes = boxes[inds, :]
                fg_dets = np.hstack((fg_boxes, fg_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                keep = nms(fg_dets, 0.7)
                fg_dets = fg_dets[keep, :]

                gt_boxes[2] += gt_boxes[0]
                gt_boxes[3] += gt_boxes[1]

                for f in range(len(fg_dets)):

                    detection = [
                        fg_dets[f][4], fg_dets[f][0], fg_dets[f][1],
                        fg_dets[f][2], fg_dets[f][3]
                    ]

                    writeForPASCALVOC("Object-Detection-Metrics/detections/",
                                      fileName, str(category_id), detection)
                    writeForPASCALVOC("Object-Detection-Metrics/groundtruths/",
Exemplo n.º 4
0
def eval_images(net):

    print("Start eval")

    pathToBackgrounds = '/content/drive/My Drive/ActiveVisionDataset/'
    pathToGT = '/content/drive/My Drive/Data/GT/'
    pathToTargets = '/content/drive/My Drive/Data/AVD_and_BigBIRD_targets_v1/'

    valid_files = find_files(pathToBackgrounds, ".jpg")
    print("Files read")

    score = 0
    numOfImages = 0
    numToEval = 250
    numCorrect = 0
    fail = 0
    corr = 0

    countDict = {}
    difficulties = [3]
    #perCat = 50

    while True:

        chosen_image_path = random.choice(valid_files)
        chosen_image = chosen_image_path.split("/")[-1]

        json_files = find_files(pathToGT, ".json")
        json_data = ""
        for json_file in json_files:
            with open(json_file, 'r') as file:
                lines = file.readlines()
                for line in lines:
                    json_data += line

        if chosen_image in json_data:

            try:

                image_id = json_data.split(chosen_image)[1].split("}")[0]
                image_id = image_id.split("id\": ")[1].split(",")[0]

                data = json_data.split("\"image_id\": " +
                                       image_id)[1].split("}")[0]

                bb_data = []
                bounding_boxes_data = data.split("\"bbox\": [")[1].split(
                    "]")[0]
                for value in bounding_boxes_data.split(","):
                    bb_data.append(int(value))

                category_id = int(
                    data.split("\"category_id\": ")[1].split(",")[0])
                print(category_id)

                target_name = ""
                with open(pathToBackgrounds + "instance_id_map.txt",
                          'r') as file:
                    lines = file.readlines()
                    for line in lines:
                        line = line.rstrip()
                        linesplit = line.split(" ")
                        name = linesplit[0]
                        cat_num = int(linesplit[1])
                        if cat_num == category_id:
                            target_name = name
                            break

                batch_im_data = []
                batch_target_data = []
                batch_gt_boxes = []

                target_paths = find_files(pathToTargets, ".jpg")
                target_image_paths_1 = []
                target_image_paths_2 = []
                for target_path in target_paths:
                    if target_name in target_path:
                        if "target_0" in target_path:
                            target_image_paths_1.append(target_path)
                        elif "target_1" in target_path:
                            target_image_paths_2.append(target_path)

                pre_load_image = cv2.imread(chosen_image_path)
                pre_load_target_1 = cv2.imread(
                    random.choice(target_image_paths_1))
                pre_load_target_2 = cv2.imread(
                    random.choice(target_image_paths_2))

                im_data = cv2.resize(pre_load_image, (1920, 1080),
                                     interpolation=cv2.INTER_AREA)
                gt_boxes = [bb_data[0], bb_data[1], bb_data[2], bb_data[3], 1]
                difficulty = 0
                if gt_boxes[2] > 300 and gt_boxes[3] > 100:
                    difficulty = 1
                elif gt_boxes[2] > 200 and gt_boxes[3] > 75:
                    difficulty = 2
                elif gt_boxes[2] > 100 and gt_boxes[3] > 50:
                    difficulty = 3
                elif gt_boxes[2] > 50 and gt_boxes[3] > 30:
                    difficulty = 4
                else:
                    difficulty = 5
                if difficulty not in difficulties:
                    continue
                if difficulty not in countDict:
                    countDict[difficulty] = 0
                if countDict[
                        difficulty] >= perCat or difficulty not in countDict:
                    continue
                done = True
                for key, value in countDict.iteritems():
                    if value < (perCat - 1):
                        done = False
                if done:
                    print("Completed")
                    break
                countDict[difficulty] += 1
                target1 = cv2.resize(pre_load_target_1, (80, 80),
                                     interpolation=cv2.INTER_AREA)
                target2 = cv2.resize(pre_load_target_2, (80, 80),
                                     interpolation=cv2.INTER_AREA)

                batch_im_data.append(normalize_image(im_data, cfg))
                batch_gt_boxes.extend(gt_boxes)
                batch_target_data.append(normalize_image(target1, cfg))
                batch_target_data.append(normalize_image(target2, cfg))

                target_data = match_and_concat_images_list(
                    batch_target_data, min_size=cfg.MIN_TARGET_SIZE)
                im_data = match_and_concat_images_list(batch_im_data)
                gt_boxes = np.asarray(batch_gt_boxes)
                im_info = im_data.shape[1:]
                im_data = np_to_variable(im_data, is_cuda=True)
                im_data = im_data.permute(0, 3, 1, 2).contiguous()
                target_data = np_to_variable(target_data, is_cuda=True)
                target_data = target_data.permute(0, 3, 1, 2).contiguous()

                scores, boxes = im_detect(net,
                                          target_data,
                                          im_data,
                                          im_info,
                                          features_given=False)

                inds = np.where(scores[:, 1] > 0.1)[0]
                fg_scores = scores[inds, 1]
                fg_boxes = boxes[inds, :]
                fg_dets = np.hstack((fg_boxes, fg_scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                keep = nms(fg_dets, 0.7)
                fg_dets = fg_dets[keep, :]

                if len(fg_dets) < 5:
                    numOfImages += 1
                    fail += 1
                    pass

                bb_data[2] += bb_data[0]
                bb_data[3] += bb_data[1]

                #print("Data")
                #print(fg_boxes[0])
                #print(bb_data)

                for x in range(5):

                    detection = [
                        fg_dets[x][4], fg_dets[x][0], fg_dets[x][1],
                        fg_dets[x][2], fg_dets[x][3]
                    ]

                    writeForPASCALVOC("Object-Detection-Metrics/detections/",
                                      fileName, str(category_id), detection)
                    writeForPASCALVOC("Object-Detection-Metrics/groundtruths/",
                                      fileName, str(category_id), bb_data)

                numOfImages += 1
                print("Number evaluated: " + str(numOfImages))

                if numOfImages >= numToEval:
                    break

            except Exception as e:
                print(e)
                pass

    print("numCorrect:", numCorrect)
    print("numOfImages:", numOfImages)
    print("corr:", corr)
    print("total:", corr + fail)
    print("Final score: " + str(numCorrect / numOfImages))
Exemplo n.º 5
0
def eval_synth_images(net):

    score = 0
    numOfImages = 0
    numToEval = 250
    iouTot = 0

    while True:

        try:

            batch_im_data = []
            batch_target_data = []
            batch_gt_boxes = []

            im_data, gt_boxes, target1, target2, image_path = load_synth_image(
            )

            gt_boxes[2] += gt_boxes[0]
            gt_boxes[3] += gt_boxes[1]

            gt = []
            gt.append(gt_boxes[0])
            gt.append(gt_boxes[1])
            gt.append(gt_boxes[2])
            gt.append(gt_boxes[3])

            batch_im_data.append(normalize_image(im_data, cfg))
            batch_gt_boxes.extend(gt_boxes)
            batch_target_data.append(normalize_image(target1, cfg))
            batch_target_data.append(normalize_image(target2, cfg))

            #prep data for input to network
            target_data = match_and_concat_images_list(
                batch_target_data, min_size=cfg.MIN_TARGET_SIZE)
            im_data = match_and_concat_images_list(batch_im_data)
            gt_boxes = np.asarray(batch_gt_boxes)
            im_info = im_data.shape[1:]
            im_data = np_to_variable(im_data, is_cuda=True)
            im_data = im_data.permute(0, 3, 1, 2).contiguous()
            target_data = np_to_variable(target_data, is_cuda=True)
            target_data = target_data.permute(0, 3, 1, 2).contiguous()

            scores, boxes = im_detect(net,
                                      target_data,
                                      im_data,
                                      im_info,
                                      features_given=False)

            print("box 0")
            print(boxes[0])
            print(scores[0])
            print("box 2")
            print(boxes[2])
            print(scores[2])
            print("box 50")
            print(boxes[50])
            print(scores[50])

            print(len(boxes))
            inds = np.where(scores[:, 1] > 0.1)[0]
            fg_scores = scores[inds, 1]
            fg_boxes = boxes[inds, :]
            print(len(fg_boxes))
            fg_dets = np.hstack((fg_boxes, fg_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(fg_dets, 0.7)
            fg_dets = fg_dets[keep, :]
            print(len(fg_dets))
            print(fg_dets[0][4])

            if len(fg_dets) < 5:
                continue
            '''
      im = np.array(Image.open(image_path), dtype=np.uint8)
      fig,ax = plt.subplots(1)
      ax.imshow(im)
      for i in range(len(fg_dets)-1, 0, -1):
        x1, y1, x2, y2 = fg_dets[i][0], fg_dets[i][1], fg_dets[i][2], fg_dets[i][3]
        col = fg_dets[i][4]**2
        ax.add_patch(Rectangle((x1, y1), x2-x1, y2-y1, fill=None, alpha=1, edgecolor=(1, 1-col, 1-col)))
      print("draw")
      x1, y1, x2, y2 = gt[0], gt[1], gt[2], gt[3]
      ax.add_patch(Rectangle((x1, y1), x2-x1, y2-y1, fill=None, alpha=1, edgecolor='b'))
      plt.savefig("1.png")

      im = np.array(Image.open(image_path), dtype=np.uint8)
      fig,ax = plt.subplots(1)
      ax.imshow(im)
      for i in range(5):
        x1, y1, x2, y2 = fg_dets[i][0], fg_dets[i][1], fg_dets[i][2], fg_dets[i][3]
        col = fg_dets[i][4]**2
        ax.add_patch(Rectangle((x1, y1), x2-x1, y2-y1, fill=None, alpha=1, edgecolor=(1, 1-col, 1-col)))
      print("draw")
      x1, y1, x2, y2 = gt[0], gt[1], gt[2], gt[3]
      ax.add_patch(Rectangle((x1, y1), x2-x1, y2-y1, fill=None, alpha=1, edgecolor='b'))
      plt.savefig("2.png")

      input("wait")
      '''

            #Hardcode cat ID
            category_id = 1

            for x in range(5):

                detection = [
                    fg_dets[x][4], fg_dets[x][0], fg_dets[x][1], fg_dets[x][2],
                    fg_dets[x][3]
                ]

                writeForPASCALVOC("Object-Detection-Metrics/detections/",
                                  fileName, str(category_id), detection)
                writeForPASCALVOC("Object-Detection-Metrics/groundtruths/",
                                  fileName, str(category_id), gt)

            numOfImages += 1
            print("Number evaluated: " + str(numOfImages))

            if numOfImages >= numToEval:
                break

        except Exception as e:
            print(e)
            pass