def test_unique_label_indices(): a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp) left = ht.unique_label_indices(a) right = np.unique(a, return_index=True)[1] tm.assert_numpy_array_equal(left, right, check_dtype=False) a[np.random.choice(len(a), 10)] = -1 left = ht.unique_label_indices(a) right = np.unique(a, return_index=True)[1][1:] tm.assert_numpy_array_equal(left, right, check_dtype=False)
def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool): """ Reconstruct labels from observed group ids. Parameters ---------- comp_ids : np.ndarray[np.intp] xnull : bool If nulls are excluded; i.e. -1 labels are passed through. """ if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8") shape = np.asarray(shape, dtype="i8") + lift if not is_int64_overflow_possible(shape): # obs ids are deconstructable! take the fast route! out = decons_group_index(obs_ids, shape) return out if xnull or not lift.any() else [ x - y for x, y in zip(out, lift) ] # TODO: unique_label_indices only used here, should take ndarray[np.intp] i = unique_label_indices(ensure_int64(comp_ids)) i8copy = lambda a: a.astype("i8", subok=False, copy=True) return [i8copy(lab[i]) for lab in labels]
def decons_obs_group_ids( comp_ids: npt.NDArray[np.intp], obs_ids: npt.NDArray[np.intp], shape: Shape, labels: Sequence[npt.NDArray[np.signedinteger]], xnull: bool, ) -> list[npt.NDArray[np.intp]]: """ Reconstruct labels from observed group ids. Parameters ---------- comp_ids : np.ndarray[np.intp] obs_ids: np.ndarray[np.intp] shape : tuple[int] labels : Sequence[np.ndarray[np.signedinteger]] xnull : bool If nulls are excluded; i.e. -1 labels are passed through. """ if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp) arr_shape = np.asarray(shape, dtype=np.intp) + lift shape = tuple(arr_shape) if not is_int64_overflow_possible(shape): # obs ids are deconstructable! take the fast route! out = _decons_group_index(obs_ids, shape) return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] indexer = unique_label_indices(comp_ids) return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels]
def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): """ reconstruct labels from observed group ids Parameters ---------- xnull: boolean, if nulls are excluded; i.e. -1 labels are passed through """ if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') shape = np.asarray(shape, dtype='i8') + lift if not is_int64_overflow_possible(shape): # obs ids are deconstructable! take the fast route! out = decons_group_index(obs_ids, shape) return out if xnull or not lift.any() \ else [x - y for x, y in zip(out, lift)] i = unique_label_indices(comp_ids) i8copy = lambda a: a.astype('i8', subok=False, copy=True) return [i8copy(lab[i]) for lab in labels]
def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull): """ reconstruct labels from observed group ids Parameters ---------- xnull: boolean, if nulls are excluded; i.e. -1 labels are passed through """ if not xnull: lift = np.fromiter(((a == -1).any() for a in labels), dtype='i8') shape = np.asarray(shape, dtype='i8') + lift if not is_int64_overflow_possible(shape): # obs ids are deconstructable! take the fast route! out = decons_group_index(obs_ids, shape) return out if xnull or not lift.any() \ else [x - y for x, y in zip(out, lift)] i = unique_label_indices(comp_ids) i8copy = lambda a: a.astype('i8', subok=False, copy=True) return [i8copy(lab[i]) for lab in labels]
def test_unique_label_indices_intp(writable): keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp) keys.flags.writeable = writable result = ht.unique_label_indices(keys) expected = np.array([0, 1, 5], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)
def get_map_for_class(zipped_data_arr, min_ious=np.linspace(.50, 0.95, 10, endpoint=True), avg_recalls=np.linspace(0.00, 1.00, 101, endpoint=True), nms_iou=.7): # Used linspace over arange for min_ious/avg_recalls due to issues with endpoints all_confs = [] all_correct_preds = [] num_total_detections = 0 num_total_gtruths = 0 for ground_arr, detector_arr in zipped_data_arr: num_gtruths = len(ground_arr) if not detector_arr: num_total_gtruths += num_gtruths continue detector_arr = np.asarray(detector_arr, dtype=np.float64) # Sort by descending confidence, use mergesort to match COCO evaluation detector_arr = detector_arr[detector_arr[:, -1].argsort( kind='mergesort')[::-1]] det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose( ) if nms_iou is not None: # Code for NMS all_indices_to_keep = [] cur_indices_to_keep = np.arange(len(detector_arr)) # Repeat until no detections left below overlap threshold while cur_indices_to_keep.size > 1: # Add the most confident element all_indices_to_keep.append(cur_indices_to_keep[0]) cur_x_min = det_x_min[cur_indices_to_keep] cur_x_max = det_x_max[cur_indices_to_keep] cur_y_min = det_y_min[cur_indices_to_keep] cur_y_max = det_y_max[cur_indices_to_keep] intersect_widths = ( np.minimum(cur_x_max[0], cur_x_max[1:]) - np.maximum(cur_x_min[0], cur_x_min[1:])).clip(min=0) intersect_heights = ( np.minimum(cur_y_max[0], cur_y_max[1:]) - np.maximum(cur_y_min[0], cur_y_min[1:])).clip(min=0) intersect_areas = intersect_widths * intersect_heights # Inclusion exclusion principle! union_areas = ( (cur_x_max[0] - cur_x_min[0]) * (cur_y_max[0] - cur_y_min[0]) + (cur_x_max[1:] - cur_x_min[1:]) * (cur_y_max[1:] - cur_y_min[1:])) - intersect_areas # Just in case a ground truth has zero area cur_ious = np.divide(intersect_areas, union_areas, out=union_areas, where=union_areas != 0) # Keep appending [0] to a list # Just say cur_indices = np where cur_ious < nms_iou cur_indices_to_keep = cur_indices_to_keep[1:] cur_indices_to_keep = np.intersect1d( cur_indices_to_keep, cur_indices_to_keep[np.nonzero(cur_ious < nms_iou)[0]], assume_unique=True) if cur_indices_to_keep.size == 1: all_indices_to_keep.append(cur_indices_to_keep[0]) detector_arr = detector_arr[np.asarray(all_indices_to_keep)] det_x_min, det_x_max, det_y_min, det_y_max, confs = detector_arr.transpose( ) num_detections = len(detector_arr) if not ground_arr: num_total_detections += num_detections all_confs.append(confs) continue ground_arr = np.asarray(ground_arr, dtype=np.float64) ground_x_min, ground_x_max, ground_y_min, ground_y_max = ground_arr.transpose( ) # Clip negative since negative implies no overlap intersect_widths = ( np.minimum(det_x_max[:, np.newaxis], ground_x_max) - np.maximum(det_x_min[:, np.newaxis], ground_x_min)).clip(min=0) intersect_heights = ( np.minimum(det_y_max[:, np.newaxis], ground_y_max) - np.maximum(det_y_min[:, np.newaxis], ground_y_min)).clip(min=0) intersect_areas = intersect_widths * intersect_heights # Inclusion exclusion principle! union_areas = ((det_x_max - det_x_min) * (det_y_max - det_y_min))[:, np.newaxis] + ( (ground_x_max - ground_x_min) * (ground_y_max - ground_y_min)) - intersect_areas # Just in case a ground truth has zero area iou = np.divide(intersect_areas, union_areas, out=union_areas, where=union_areas != 0) # Defined best ground truth as one with highest IOU. This is an array of size num_detections, where # best_gtruths[i] is the index of the ground truth to which prediction i is most similar (highest IOU) best_gtruths = np.argmax(iou, axis=1) # valid_preds is a generator where each element is a numpy int array. Each numpy array corresponds to # a min_iou in the min_ious array, and has indices corresponding to the predictions whose # prediction-ground truth pairs have IOU greater than that min_iou. valid_preds = map( np.nonzero, iou[np.arange(num_detections), best_gtruths] > min_ious[:, np.newaxis]) # ## Useful for standard precision/recall metrics # num_true_positives = np.count_nonzero(np.bincount(best_gtruths[valid_preds])) # num_false_positives = num_detections - detected_gtruths # num_false_negatives = num_gtruths - detected_gtruths # # best_gtruths[valid_preds] uses the previously calculated valid_preds array to return an array # containing the ground truths indices for each prediction whenever the ground truth-prediction # IOU was greater than min_iou. Then unique_label_indices is used to find the leftmost occuring # ground truth index for each ground truth index, which corresponds to finding the true positives # (since we only consider the highest confidence prediction for each ground truth to be a true # positive, rest are false positives) # Note that pandas unique_label_indices is equivalent to np.unique(labels, return_index=True)[1] but # is considerably faster due to using a hashtable instead of sorting # Once the indices of the true positive predictions are found in the smaller array containing only # predictions with IOU > min_iou, they are converted back into indices for the original array # using valid_pred. correct_preds = [ valid_pred[0][unique_label_indices(best_gtruths[valid_pred[0]])] + num_total_detections for valid_pred in valid_preds ] all_correct_preds.append(correct_preds) all_confs.append(confs) num_total_detections += num_detections num_total_gtruths += num_gtruths # Edge case of no predictions for a class if not all_confs: return 0 # Concatenates all predictions and confidences together to find class MAP all_confs = np.concatenate(all_confs) all_correct_preds = [ np.concatenate(cur_pred) for cur_pred in zip(*all_correct_preds) ] # Sets only correct prediction indices to true, rest to false. true_positives = np.zeros((len(min_ious), num_total_detections), dtype=bool) for iou_index, positive_locs in enumerate(all_correct_preds): true_positives[iou_index][positive_locs] = True # Mergesort is chosen to be consistent with coco/matlab results sort_order = all_confs.argsort(kind='mergesort')[::-1] true_positives = true_positives[:, sort_order] # Keeps track of number of true positives until each given point all_true_positives = np.cumsum(true_positives, axis=1) # PASCAL VOC 2012 if avg_recalls is None: # Zero pad both sides to calculate area under curve precision = np.zeros((len(min_ious), num_total_detections + 2), dtype=np.float64) # Pad one side with zeros and the other with ones for area under curve recall = np.zeros((len(min_ious), num_total_detections + 2), dtype=np.float64) recall[:, -1] = np.ones(len(min_ious), dtype=np.float64) # In python >=3 this is equivalent to np.true_divide precision[:, 1:-1] = all_true_positives / np.arange( 1, num_total_detections + 1) # Makes each element in precision list max of all elements to right (ignores endpoints) precision[:, 1:-1] = np.maximum.accumulate(precision[:, -2:0:-1], axis=1)[:, ::-1] recall[:, 1:-1] = all_true_positives / num_total_gtruths # Calculate area under P-R curve for each IOU # Should only be one IOU at .5 for PASCAL all_areas = [] for cur_recall, cur_precision in zip(recall, precision): # Find indices where value of recall changes change_points = np.nonzero(cur_recall[1:] != cur_recall[:-1])[0] # Calculate sum of dw * dh as area and append to all areas all_areas.append( np.sum( (cur_recall[change_points + 1] - cur_recall[change_points]) * cur_precision[change_points + 1])) return np.mean(all_areas) # PASCAL VOC 2007 else: # The extra zero is to deal with a recall larger than is achieved by model precision = np.zeros((len(min_ious), num_total_detections + 1), dtype=np.float64) # In python >=3 this is equivalent to np.true_divide precision[:, :-1] = all_true_positives / np.arange( 1, num_total_detections + 1) # Makes each element in precision list max of all elements to right (extra zero at right doesn't matter) precision = np.maximum.accumulate(precision[:, ::-1], axis=1)[:, ::-1] recall = all_true_positives / num_total_gtruths # For each recall, finds leftmost index (i.e. largest precision) greater than it indices_to_average = np.apply_along_axis(np.searchsorted, 1, recall, avg_recalls) # Finds matching largest prediction for each recall and turns it into an array precs_to_average = precision[np.arange(len(precision))[:, np.newaxis], indices_to_average] # Returns average precision over each recall and over each IOU. Can specify an axis # if separate average precision is wanted for each IOU (e.g. to do more precise statistics) return np.mean(precs_to_average)