示例#1
0
def get_pos_negs_all_v2(dbidxs, label_db: LabelDB, vec_meta: pd.DataFrame):
    idxs = pr.BitMap(dbidxs)
    relvecs = vec_meta[vec_meta.dbidx.isin(idxs)]

    pos = []
    neg = []
    for idx in dbidxs:
        acc_vecs = relvecs[relvecs.dbidx == idx]
        acc_boxes = get_boxes(acc_vecs)
        label_boxes = label_db.get(idx, format="df")
        ious = box_iou(label_boxes, acc_boxes)
        total_iou = ious.sum(axis=0)
        negatives = total_iou == 0
        negvec_positions = acc_vecs.index[negatives].values

        # get the highest iou positives for each
        max_ious_id = np.argmax(ious, axis=1)
        max_ious = np.max(ious, axis=1)

        pos_idxs = pr.BitMap(max_ious_id[max_ious > 0])
        # if label_boxes.shape[0] > 0: # some boxes are size 0 bc. of some bug in the data, so don't assert here.
        #     assert len(pos_idxs) > 0

        posvec_positions = acc_vecs.index[pos_idxs].values
        pos.append(posvec_positions)
        neg.append(negvec_positions)

    posidxs = pr.BitMap(np.concatenate(pos))
    negidxs = pr.BitMap(np.concatenate(neg))
    return posidxs, negidxs
示例#2
0
def get_pos_negs(box, vec_meta):
    """
    For a given image im, and a list of boxes (dataframe)
    and metadata of image vectors, compute
     1. vectors of image chunks that do not overlap at all
     2. vectors of chunks nearest to box center.
    """
    if box.shape[0] == 0:
        neg_idxs = pr.BitMap(vec_meta.index.values)
        pos_idxs = pr.BitMap()
        return pos_idxs, neg_idxs

    ijs = box2ij(box, base_size=224)
    nearest_ijs = nearest_ij(box, base_size=224)
    tmp_meta = vec_meta
    negatives = []
    centers = []
    for tup, ctup in zip(ijs.itertuples(), nearest_ijs.itertuples()):
        overlap_ijs = tmp_meta.iis.between(
            tup.i1, tup.i2 - 1) & (tmp_meta.jjs.between(tup.j1, tup.j2 - 1))
        negs = tmp_meta[~overlap_ijs]  # no overlap whatsoever
        negatives.append(pr.BitMap(negs.index))
        cent = tmp_meta[(tmp_meta.iis == ctup.i) & (tmp_meta.jjs == ctup.j)]
        centers.append(pr.BitMap(cent.index))

    neg_idxs = pr.BitMap.intersection(*negatives)
    pos_idxs = pr.BitMap.union(*centers)
    return pos_idxs, neg_idxs
示例#3
0
def hard_neg_tuples(v, Xt, yt, max_tups):
    """returns indices for the 'hardest' ntups"""
    p = np.where(yt > 0)[0]
    n = np.where(yt < 1)[0]
    assert p.shape[0] > 0
    assert n.shape[0] > 0

    scores = Xt @ v.reshape(-1, 1)
    score_diffs = scores[p].reshape(-1, 1) - scores[n].reshape(1, -1)
    iis, jjs = np.meshgrid(np.arange(p.shape[0]),
                           np.arange(n.shape[0]),
                           indexing="ij")
    diff_order = np.argsort(score_diffs, axis=None)[:max_tups]
    #   score_diffs.flatten()[diff_order]
    pps = p[iis.flatten()[diff_order]]
    nns = n[jjs.flatten()[diff_order]]

    ridx = np.array(pr.BitMap(pps).union(pr.BitMap(nns)))
    lookup_tab = np.zeros(Xt.shape[0], dtype="int") - 1
    lookup_tab[ridx] = np.arange(ridx.shape[0], dtype="int")
    piis = lookup_tab[pps]
    pjjs = lookup_tab[nns]
    # then X[ridx][piis] and X[ridx][jjs]
    # rdix o piis == iis <=> piis = iis
    assert (ridx[piis] == pps).all()
    return ridx, piis, pjjs
示例#4
0
    def query(self,
              *,
              topk,
              mode,
              vector=None,
              exclude=None,
              startk=None,
              **kwargs):
        if exclude is None:
            exclude = pr.BitMap([])
        included = pr.BitMap(self.all_indices).difference(exclude)
        if len(included) == 0:
            return np.array([]), np.array([])

        if len(included) <= topk:
            topk = len(included)

        assert mode == "dot"

        metas = self.vector_meta.dbidx.isin(included)
        vecs = self.vectors[metas]

        if vector is None:
            scores = np.random.randn(vecs.shape[0])
        else:
            scores = vecs @ vector.reshape(-1)

        maxpos = np.argsort(-scores)[:topk]
        dbidxs = np.array(included)[maxpos]
        # metas = metas.iloc[maxpos][['x1', 'y1', ]]
        scores = scores[maxpos]

        ret = dbidxs
        assert ret.shape[0] == scores.shape[0]
        sret = pr.BitMap(ret)
        assert len(sret) == ret.shape[0]  # no repeats
        assert ret.shape[
            0] == topk  # return quantity asked, in theory could be less
        assert sret.intersection_cardinality(
            exclude) == 0  # honor exclude request

        def make_acc(sc, dbidx):
            return pd.DataFrame.from_records(
                [dict(x1=0, y1=0, x2=224, y2=224, dbidx=dbidx, score=sc)])

        return {
            "dbidxs":
            ret,
            "nextstartk":
            len(exclude) + ret.shape[0],
            "activations":
            [make_acc(sc, dbidx) for (sc, dbidx) in zip(scores, ret)],
        }
示例#5
0
 def __init__(self, index: AccessMethod):
     self.index = index
     self.returned = (
         pr.BitMap()
     )  # images returned from index (not necessarily seen yet)
     self.label_db = LabelDB()
     self.startk = 0
示例#6
0
    def query(self, *, topk, vector, exclude=None, startk=None, **kwargs):
        agg_method = 'avg_score'
        if exclude is None:
            exclude = pr.BitMap([])
        included = pr.BitMap(self.all_indices).difference(exclude)
        if len(included) == 0:
            return np.array([]), np.array([])

        if len(included) <= topk:
            topk = len(included)

        fullmeta = self.vector_meta[self.vector_meta.dbidx.isin(included)]
        nframes = len(included)
        dbidxs = np.zeros(nframes) * -1
        dbscores = np.zeros(nframes)
        activations = []
        for i, (dbidx, frame_vec_meta) in enumerate(fullmeta.groupby("dbidx")):
            dbidxs[i] = dbidx
            boxscs = np.zeros(frame_vec_meta.shape[0])
            for j in range(frame_vec_meta.shape[0]):
                tup = frame_vec_meta.iloc[j:j + 1]
                # GET BOX
                # GET IMAGE

                # GET VECTOR
                image_vector = tup.vectors.values[0]
                # CROSS VECTOR
                #print(tup)
                #print(tup.vectors.values[0])
                score = image_vector @ vector.reshape(-1)
                boxscs[j] = score
            frame_activations = frame_vec_meta.assign(score=boxscs)
            frame_activations = frame_activations[
                frame_activations.score == frame_activations.score.max()][[
                    "x1", "y1", "x2", "y2", "dbidx", "score", "filename"
                ]]
            activations.append(frame_activations)
            dbscores[i] = np.max(boxscs)

        topkidx = np.argsort(-dbscores)[:topk]

        return {
            "dbidxs": dbidxs[topkidx].astype("int"),
            "nextstartk": 100,  #nextstartk,
            "activations": [activations[idx] for idx in topkidx],
        }
示例#7
0
 def __init__(
     self,
     gdm: GlobalDataManager,
     dataset: SeesawDatasetManager,
     hdb: AccessMethod,
     params: SessionParams,
 ):
     self.gdm = gdm
     self.dataset = dataset
     self.acc_indices = []
     self.acc_activations = []
     self.seen = pr.BitMap([])
     self.accepted = pr.BitMap([])
     self.params = params
     self.init_q = None
     self.timing = []
     self.image_timing = {}
     self.index = hdb
     self.q = hdb.new_query()
     self.loop = SeesawLoop(self.gdm, self.q, params=self.params)
     self.action_log = []
     self._log("init")
示例#8
0
    def __init__(self, filepath, mode='r'):
        self._mode = mode
        try:
            fmode = {'r': 'rb', 'rw': 'r+b'}[self._mode]
        except KeyError:
            raise ValueError('invalid mode')

        if (not os.path.isfile(filepath)) and self._mode == 'rw':
            with open(filepath, 'wb') as fp:
                b = roaring.BitMap()
                fp.write(b.serialize())

        self._fp = open(filepath, fmode)
        buff = self._fp.read()
        self._fp.seek(0)
        self.map = roaring.BitMap.deserialize(buff)
示例#9
0
def get_metric_summary(res: BenchResult):
    session = res.session
    curr_idx = 0
    hit_indices = []
    for ent in session.gdata:
        for imdata in ent:
            if is_image_accepted(imdata):
                hit_indices.append(curr_idx)
            curr_idx += 1
    index_set = pr.BitMap(hit_indices)
    assert len(index_set) == len(hit_indices)
    return dict(
        hit_indices=np.array(index_set),
        nseen=curr_idx,
        nimages=res.nimages,
        ntotal=res.ntotal,
        total_time=res.total_time,
    )
示例#10
0
def restrict_fine_grained(vec_meta, vec, indxs):
    assert vec_meta.shape[0] == vec.shape[0]
    assert (indxs[1:] > indxs[:-1]).all(), "must be sorted"
    mask = vec_meta.dbidx.isin(pr.BitMap(indxs))
    if mask.all():
        return vec_meta, vec

    vec_meta = vec_meta[mask]
    vec = vec[mask]
    lookup_table = np.zeros(vec_meta.dbidx.max() + 1).astype("int") - 1
    lookup_table[indxs] = np.arange(indxs.shape[0], dtype="int")
    new_dbidx = lookup_table[vec_meta.dbidx]
    assert (new_dbidx >= 0).all()
    vec_meta = vec_meta.assign(
        dbidx=new_dbidx)  # this line shows up in profiler
    assert (
        vec_meta.dbidx.unique().shape[0] == indxs.shape[0]
    ), "missing fine-grained embedding for some of the indices requested"
    assert vec_meta.shape[0] == vec.shape[0]
    return vec_meta.reset_index(drop=True), vec
示例#11
0
        def get_nns(startk, topk):
            i = 0
            deltak = topk * 100
            while True:
                if i > 1:
                    print(
                        "warning, we are looping too much. adjust initial params?"
                    )

                vec_idxs, scores = self.vec_index.query(vector,
                                                        top_k=startk + deltak)
                found_idxs = pr.BitMap(vec_meta.dbidx.values[vec_idxs])

                newidxs = found_idxs.difference(exclude)
                if len(newidxs) >= topk:
                    break

                deltak = deltak * 2
                i += 1

            return vec_idxs, scores
示例#12
0
 def get_seen(self):
     return pr.BitMap(self.ldata.keys())
示例#13
0
    def _query_prelim(self,
                      *,
                      vector,
                      topk,
                      zoom_level,
                      exclude=None,
                      startk=None):
        if exclude is None:
            exclude = pr.BitMap([])

        included_dbidx = pr.BitMap(self.all_indices).difference(exclude)
        vec_meta = self.vector_meta

        if len(included_dbidx) == 0:
            print("no dbidx included")
            return [], [], []

        if len(included_dbidx) <= topk:
            topk = len(included_dbidx)

        ## want to return proposals only for images we have not seen yet...
        ## but library does not allow this...
        ## guess how much we need... and check
        def get_nns(startk, topk):
            i = 0
            deltak = topk * 100
            while True:
                if i > 1:
                    print(
                        "warning, we are looping too much. adjust initial params?"
                    )

                vec_idxs, scores = self.vec_index.query(vector,
                                                        top_k=startk + deltak)
                found_idxs = pr.BitMap(vec_meta.dbidx.values[vec_idxs])

                newidxs = found_idxs.difference(exclude)
                if len(newidxs) >= topk:
                    break

                deltak = deltak * 2
                i += 1

            return vec_idxs, scores

        def get_nns_by_vector_exact():
            scores = self.vectors @ vector.reshape(-1)
            vec_idxs = np.argsort(-scores)
            return vec_idxs, scores[vec_idxs]

        if self.vec_index is not None:
            idxs, scores = get_nns(startk, topk)
        else:
            idxs, scores = get_nns_by_vector_exact()

        # work only with the two columns here bc dataframe can be large
        topscores = vec_meta[["dbidx"]].iloc[idxs]
        topscores = topscores.assign(score=scores)
        allscores = topscores

        newtopscores = topscores[~topscores.dbidx.isin(exclude)]
        scoresbydbidx = (newtopscores.groupby("dbidx").score.max().sort_values(
            ascending=False))
        score_cutoff = scoresbydbidx.iloc[topk - 1]  # kth largest score
        newtopscores = newtopscores[newtopscores.score >= score_cutoff]

        # newtopscores = newtopscores.sort_values(ascending=False)
        nextstartk = (allscores.score >= score_cutoff).sum()
        nextstartk = math.ceil(startk * 0.8 +
                               nextstartk * 0.2)  # average to estimate next
        candidates = pr.BitMap(newtopscores.dbidx)
        assert len(candidates) >= topk
        assert candidates.intersection_cardinality(exclude) == 0
        return newtopscores.index.values, candidates, allscores, nextstartk