def query_perturbations(inputdir, outdir, nptb): imgpathlist = glob.glob(inputdir + "*") imgpathlist.sort() safe_create_dir(outdir) count = 0 for imgpath in imgpathlist: img = cv2.imread(imgpath) parts = os.path.basename(imgpath).split("_", 1) basename, ext = parts[1].rsplit('.', 1) for k in range(nptb): outpath = "{0:s}{1:05d}_{2:s}_p{3:03d}.{4:s}".format(outdir, count, basename, k, ext) n = np.random.randint(1, 4, 1)[0] np.random.shuffle(perturbations) print(". {0:s} -> {1:s}".format(os.path.basename(imgpath), os.path.basename(outpath))) print(" |_ ", end="", flush=True) for pert in perturbations: print(" {0:s};".format(pert), end="", flush=True) img = perturbate(img, pert) print("", end="\n") cv2.imwrite(outpath, img) count += 1
def search_index(retcfg): q_features = load_features(retcfg['path']['qfeature']) q_namelist = np.loadtxt(retcfg['path']['qlist'], dtype=dict(names=('qname', 'nfeat'), formats=('U100', np.int32))) assert q_features.shape[0] == np.sum(q_namelist['nfeat']), "Inconsistent number of features sum and size of" \ "query features array" norm = retcfg.get('feature', 'norm', fallback=None) db_features = load_features(retcfg['path']['dbfeature']) if norm: db_features = normalize(db_features, norm) fidx = cv2.flann_Index() ifile = get_index_path(retcfg['path']['outdir'], retcfg['DEFAULT']['expname']) fidx.load(db_features, ifile) outdir = retcfg['path']['outdir'] + "queryfiles/" safe_create_dir(outdir) search_type = retcfg['search']['search_type'] knn = retcfg.getint('search', 'knn') rfactor = retcfg.getfloat('search', 'radius_factor') sidx = 0 for qname, n in q_namelist: qfeat = q_features[sidx:sidx+n] if norm: qfeat = normalize(qfeat, norm) matchfpath = "{0:s}{1:s}.matches".format(outdir, qname) distfpath = "{0:s}{1:s}.dist".format(outdir, qname) votes, dists, _ = flann_search(qfeat, fidx, stype=search_type, k=knn, f=rfactor, flib="cv") print(qname, "-> ", sidx, ":", sidx+n) print(votes.shape) print(dists.shape, end="\n---\n") np.save(matchfpath + ".npy", votes) np.save(distfpath + ".npy", dists) sidx += n
def extract_features(inputdir, prefix, detector, descriptor, limit): safe_create_dir(os.path.dirname(prefix)) impathlist = glob.glob(inputdir + "*") impathlist.sort() feat_per_img = [] det = get_detector(detector, n=limit) des = get_descriptor(descriptor, n=limit) previous = None for i in tqdm(range(len(impathlist)), ncols=100, desc="Image:", total=len(impathlist)): impath = impathlist[i] basename = os.path.basename(impath) img = cv2.imread(impath) kp = det.detect(img, None) _, features = des.compute(img, kp) try: features = features[:limit] except TypeError: # For corrupted images generate some random features np.random.shuffle(previous) features = previous feat_per_img.append((basename, features.shape[0])) outfeatfile = "{0:s}_{1:s}_batch{2:06d}.npy".format(prefix, descriptor, i) np.save(outfeatfile, features) previous = features.copy() idx_dtype = dict(names=('name', 'nfeat'), formats=('U100', np.int32)) outidxfile = "{0:s}_idx.out".format(prefix) np.savetxt(outidxfile, np.array(feat_per_img, dtype=idx_dtype), fmt="%-50s %d") return
def create_index(retcfg): index_type = retcfg['index']['index_type'] dist_type = retcfg['index']['dist_type'] lib = retcfg['index']['lib'] norm = retcfg.get('feature', 'norm', fallback=None) safe_create_dir(retcfg['path']['outdir']) dbfeatures = load_features(retcfg['path']['dbfeature']) outpath = "{0:s}{1:s}_{2:s}_{3:s}.dat".format(retcfg['path']['outdir'], retcfg['DEFAULT']['expname'], index_type, dist_type) if lib == "cv": fidx, params = create_flann_index(dbfeatures, index_type, dist_type, norm=norm) else: raise ValueError("Unsupported flann lib <{0:s}>".format(lib)) fidx.save(outpath) return
def feature_selection_transform(dbfeatdir, dbindexpath, dbimdir, outdir, sel_crit, limit, prefix): minv = 1 topr = 20 im_limit = int(np.ceil(0.25 * limit)) dbfeatfpath = glob.glob(dbfeatdir + "*.bfv")[0] dbkeypfpath = glob.glob(dbfeatdir + "*.bkp")[0] dbidxfpath = glob.glob(dbfeatdir + "*.txt")[0] outfeatfpath = "{0:s}{1:s}.bfv".format(outdir, prefix) outkeypfpath = "{0:s}{1:s}.bkp".format(outdir, prefix) outidxfpath = "{0:s}{1:s}_indexing.txt".format(outdir, prefix) outrankdir = "{0:s}ranks/".format(outdir) safe_create_dir(outrankdir) selfeat = [] selkeyp = [] selidx = [] if verbose: print "Reading DB features and keypoints: " dbfeatures = read_array_bin_file(dbfeatfpath) dbkeypoints = read_array_bin_file(dbkeypfpath) if verbose: print " -> ", dbfeatfpath, " ", dbfeatures.shape if verbose: print " -> ", dbkeypfpath, " ", dbkeypoints.shape, "\n" if verbose: print "Reading Indexing file: ", if verbose: print " -> ", dbidxfpath, "\n" idxdt = dict(names=('name', 'nfeat', 'topidx'), formats=('S100', 'i32', 'i32')) dbfeatindex = np.loadtxt(dbidxfpath, dtype=idxdt) nametable = dbfeatindex['name'] indextable = create_index_table(dbfeatindex['nfeat']) if verbose: print "Loading FLANN index: ", if verbose: print " -> ", dbindexpath, "\n" flann_index = load_pf_flann_index(dbfeatures, dbindexpath) if verbose: print " -- Starting feature selection --" for dbimname, nf, ti in dbfeatindex: ts_fs = time.time() dbimpath = glob.glob(dbimdir + dbimname)[0] rankfpath = outrankdir + "rank_" + os.path.splitext( dbimname)[0] + ".rk" dbim = cv2.imread(dbimpath) imindexes = np.arange(nametable.shape[0]) if verbose: print " -> Selecting features of: ", dbimname, " ({0:d}, {1:d}) from {2:d}".format( ti - nf, ti, dbfeatures.shape[0]) if verbose: print " --> Selection criteria: ", sel_crit if verbose: print " --> Minimum votes: ", minv if verbose: print " --> Rank positions: top ", topr qfeat, qkeyp = get_db_img_features(dbfeatures, dbkeypoints, nf, ti) votes, dists, tt = flann_search(qfeat, flann_index, stype='knn', k=10, flib="pf") if verbose: print " --> FLANN search time: {0:0.3f}s".format(tt) votescores, distscores, matchedfeat = count_scores( indextable, votes, dists) if verbose: print " --> Total number of matched images: ", len( matchedfeat), "\n" aux = votescores != 0 votescores = votescores[aux] distscores = distscores[aux] resnametable = nametable[aux] imindexes = imindexes[aux] normvotes, _ = normalize_scores(votescores, cvt_sim=False, min_val=0) normdists, _ = normalize_scores(distscores, cvt_sim=True, min_val=0) save_rank(rankfpath, resnametable, votescores, normvotes, distscores, normdists) mv = np.max(votescores) idx_choice = np.logical_and(votescores != mv, votescores >= minv) resnametable = resnametable[idx_choice] votescores = votescores[idx_choice] normdists = normdists[idx_choice] imindexes = imindexes[idx_choice] aux = zip(votescores, normdists) dt = dict(names=('votes', 'normd'), formats=('f32', 'f32')) scores = np.array(aux, dtype=dt) ridx = scores.argsort(order=('votes', 'normd')) ridx = ridx[::-1] selected_idx = [] total_sel = 0 for pos, i in enumerate(ridx[:topr]): imi = imindexes[i] if verbose: print " > Ranked {0:d} (Img {3:d}): {1:s} = {2:f} votes".format( pos, resnametable[i], votescores[i], imi) midx = np.array(matchedfeat[imi]) #print midx qidx = midx[:, 0] tidx = midx[:, 1] if sel_crit == 'base': selected_idx.append(qidx) total_sel += qidx.shape[0] if verbose: print " `-> {0:d} features selected".format( qidx.shape[0]) if sel_crit == 'line': qm_keyp = qkeyp[qidx] tm_keyp = dbkeypoints[tidx] cons, ll, la = line_geom_consistency(qm_keyp[:, 0:2], tm_keyp[:, 0:2], dbim.shape[1], 1.0) print "Cons Line: ", cons cons_idx = qidx[cons] if verbose: print " * Consistent number: ", cons_idx.shape if cons_idx.shape[0] > 0: # If te number of selected features is above the input limit, it applies the image limit. The image # limit is ceil(0.25*input_limit). The features selected with the image limit are those with best # response. if cons_idx.shape[0] > im_limit: qkeyp_sel = qkeyp[ cons_idx] # Gets the selected keypoints qkeyp_resp = qkeyp_sel[:, 4].reshape( -1) # Gets the response of the selected keypoints resp_order = qkeyp_resp.argsort( ) # Gets the index of the ordered responses cons_idx = cons_idx[ resp_order] # Gets the index of the consis. feat. of highest response cons_idx = cons_idx[: im_limit] # Gets only the #image_limit consistent features. selected_idx.append(cons_idx) total_sel += cons_idx.shape[0] if verbose: print " `-> {0:d} features selected".format( cons_idx.shape[0]) else: if verbose: print " `-> No features selected" if sel_crit == 'transform': qm_keyp = qkeyp[qidx] tm_keyp = dbkeypoints[tidx] _, cons = cv2.findFundamentalMat( qm_keyp[:, 0:2].astype(np.float32), tm_keyp[:, 0:2].astype(np.float32), method=cv2.FM_RANSAC, param1=3, param2=0.99) cons = cons.reshape(-1).astype('bool') cons_idx = qidx[cons] if verbose: print " * Consistent number: ", cons_idx.shape if cons_idx.shape[0] > 0: # If te number of selected features is above the input limit, it applies the image limit. The image # limit is ceil(0.25*input_limit). The features selected with the image limit are those with best # response. if cons_idx.shape[0] > im_limit: qkeyp_sel = qkeyp[ cons_idx] # Gets the selected keypoints qkeyp_resp = qkeyp_sel[:, 4].reshape( -1) # Gets the response of the selected keypoints resp_order = qkeyp_resp.argsort( ) # Gets the index of the ordered responses cons_idx = cons_idx[ resp_order] # Gets the index of the consis. feat. of highest response cons_idx = cons_idx[: im_limit] # Gets only the #image_limit consistent features. selected_idx.append(cons_idx) total_sel += cons_idx.shape[0] if verbose: print " `-> {0:d} features selected".format( cons_idx.shape[0]) else: if verbose: print " `-> No features selected" if limit != -1 and total_sel > limit: break if selected_idx != []: selected_idx = np.unique(np.hstack(selected_idx)) if verbose: print " --> Selected features: <{0:d}> from <{1:d}>".format( selected_idx.shape[0], qfeat.shape[0]) if len(selidx) == 0: selidx.append( (dbimname, selected_idx.shape[0], selected_idx.shape[0])) else: prev_top = selidx[-1][2] selidx.append((dbimname, selected_idx.shape[0], prev_top + selected_idx.shape[0])) selfeat.append(qfeat[selected_idx]) selkeyp.append(qkeyp[selected_idx]) else: print " --> No features selected frm image" #print " --> Drawing output images" #outimdir = "{0:s}{1:s}".format(outdrawdir, os.path.splitext(dbimname)[0]) #safe_create_dir(outimdir) #draw_keypoints_img(dbim, outimdir + "/original_keypoints_" + dbimname, qkeyp) #draw_keypoints_img(dbim, outimdir + "/{0:s}_selected_keypoints_{1:s}".format(sel_crit, dbimname), qkeyp[selected_idx]) te_fs = time.time() if verbose: print " - Done ({0:0.3f}s) - \n\n".format(te_fs - ts_fs) selected_db_features = np.vstack(selfeat) selected_db_keypoints = np.vstack(selkeyp) selected_feat_indexing = np.array(selidx, dtype=idxdt) if verbose: print "final feat shape: ", selected_db_features.shape if verbose: print "final keyp shape: ", selected_db_keypoints.shape if verbose: print "final indexing shape: ", selected_feat_indexing.shape write_array_bin_file(outfeatfpath, selected_db_features) write_array_bin_file(outkeypfpath, selected_db_keypoints) np.savetxt(outidxfpath, selected_feat_indexing, fmt="%-50s %d %d")
def create_and_search_index(retcfg, jobs): batch_size = -1 q_features = load_features(retcfg['path']['qfeature']) q_namelist = np.loadtxt(retcfg['path']['qlist'], dtype=dict(names=('qname', 'nfeat'), formats=('U100', np.int32))) assert q_features.shape[0] == np.sum(q_namelist['nfeat']), "Inconsistent number of features sum and size of" \ "query features array" norm = retcfg.get('feature', 'norm', fallback=None) db_features = load_features(retcfg['path']['dbfeature']) if norm: db_features = normalize(db_features, norm) q_features = normalize(q_features, norm) outdir = retcfg['path']['outdir'] + "queryfiles/" safe_create_dir(outdir) index_type = retcfg['index']['index_type'] dist_type = retcfg['index']['dist_type'] knn = retcfg.getint('search', 'knn') print(" -- Creating <{0:s}> NN index".format(index_type)) print(" -> KNN: {0:d}".format(knn)) print(" -> Metric: {0:s}\n".format(dist_type)) nnidx = nmslib.init(method=index_type, space=dist_type) nnidx.addDataPointBatch(db_features) del db_features nnidx.createIndex({ 'post': 2, 'efConstruction': knn, 'M': knn }, print_progress=True) nnidx.setQueryTimeParams({'efSearch': knn}) if batch_size == -1: batch_size = q_features.shape[0] n_batches = int(np.ceil(q_features.shape[0] / batch_size)) c = 0 for i in tqdm(range(n_batches), ncols=100, desc='Batch', total=n_batches): s = i * batch_size e = s + batch_size batch_q_features = q_features[s:e] neighbours = nnidx.knnQueryBatch(batch_q_features, k=knn, num_threads=jobs) for j in tqdm(range(len(neighbours)), ncols=100, desc='Saving', total=len(neighbours)): indices, dists = neighbours[j] qname, _ = q_namelist[c] if dists.size != knn: raise ValueError("{0:d}:{1:d}:{2:s} -- all dists == 0".format( c, j, qname)) #pdb.set_trace() matchfpath = "{0:s}{1:s}.matches".format(outdir, qname) distfpath = "{0:s}{1:s}.dist".format(outdir, qname) np.save(matchfpath + ".npy", indices) np.save(distfpath + ".npy", dists) c += 1 print("---", flush=True)
def create_and_search_db_local(retcfg, jobs): norm = retcfg.get('feature', 'norm', fallback=None) print(" -- loading DB features from: {0:s}".format( retcfg['path']['dbfeature'])) db_features = load_features(retcfg['path']['dbfeature']) db_namelist = np.loadtxt(retcfg['path']['dblist'], dtype=dict(names=('qname', 'nfeat'), formats=('U100', np.int32))) ns = db_namelist.shape[0] idarray = np.arange(ns).astype(np.int32) invidx = invert_index(db_namelist) score = retcfg.get('rank', 'score_type', fallback='vote') if norm: db_features = normalize(db_features, norm) outdir = retcfg['path']['outdir'] safe_create_dir(outdir) index_type = retcfg['index']['index_type'] dist_type = retcfg['index']['dist_type'] knn = retcfg.getint('search', 'knn_db', fallback=10) nmatches = retcfg.getint('rank', 'limit', fallback=100) print(" -- Creating <{0:s}> NN index".format(index_type)) print(" -> KNN: {0:d}".format(knn)) print(" -> Metric: {0:s}\n".format(dist_type)) nnidx = nmslib.init(method=index_type, space=dist_type) nnidx.addDataPointBatch(db_features) nnidx.createIndex({'post': 2}, print_progress=True) nnidx.setQueryTimeParams({'efSearch': knn}) indices = np.zeros((db_namelist.shape[0], nmatches), dtype=np.int32) - 1 scores = np.zeros((db_namelist.shape[0], nmatches), dtype=np.float64) - 1 s = 0 np.seterr(divide='ignore') for i in tqdm(range(10), ncols=100, desc='Sample #', total=10): name, nf = db_namelist[i] e = s + nf batch_q_features = db_features[s:e] neighbours = np.array( nnidx.knnQueryBatch(batch_q_features, k=knn, num_threads=jobs)) neighbours = list(zip(*neighbours)) pdb.set_trace() indices_ = np.array(neighbours[0]).reshape(-1).astype(np.int32) dists_ = np.array(neighbours[1]).reshape(-1) matchscore = np.bincount(invidx[indices_], minlength=ns) distscores = np.bincount( invidx[indices_], weights=dists_, minlength=ns) / matchscore aux = np.logical_not( np.logical_or(np.isnan(distscores), np.isinf(distscores))) matchscore = matchscore[aux] distscores = distscores[aux] idarray_ = idarray[aux] if score == "vote": finalscore = matchscore elif score == "distance": finalscore = distscores elif score == "combine": # Norm L2 and convert to similarity distscores_n = normalize(distscores.reshape(1, -1)).reshape(-1) distscores_n = np.max(distscores_n) - distscores_n finalscore = matchscore + distscores_n order = np.argsort(finalscore) if score == "vote" or score == "combine": order = order[::-1] idarray_ = idarray_[order] finalscore = finalscore[order] indices[i, :idarray_.size] = idarray_ scores[i, :idarray_.size] = finalscore s = e assert np.argwhere(indices == -1).size == 0, "Indices on positions {0:s} have not been updated " \ "correctly".format(np.argwhere(indices == -1).tostring()) assert np.argwhere(scores == -1).size == 0, "Indices on positions {0:s} have not been updated " \ "correctly".format(np.argwhere(indices == -1).tostring()) outfile = "{0:s}{1:s}_db_matches.npy".format( outdir, retcfg.get('DEFAULT', 'expname')) np.save(outfile, indices) outfile = "{0:s}{1:s}_db_scores.npy".format( outdir, retcfg.get('DEFAULT', 'expname')) np.save(outfile, scores)
def create_and_search_db(retcfg, jobs): batch_size = -1 norm = retcfg.get('feature', 'norm', fallback=None) print(" -- loading DB features from: {0:s}".format( retcfg['path']['dbfeature'])) db_features = load_features(retcfg['path']['dbfeature']) # We are creating an array of topk for the DB objects q_features = db_features if norm: db_features = normalize(db_features, norm) q_features = normalize(q_features, norm) outdir = retcfg['path']['outdir'] safe_create_dir(outdir) index_type = retcfg['index']['index_type'] dist_type = retcfg['index']['dist_type'] knn = retcfg.getint('search', 'knn_db', fallback=300) M = retcfg.getint('index', 'M', fallback=100) efC = retcfg.getint('index', 'efC', fallback=100) print(" -- Creating <{0:s}> NN index".format(index_type)) print(" -> KNN: {0:d}".format(knn)) print(" -> Metric: {0:s}\n".format(dist_type)) nnidx = nmslib.init(method=index_type, space=dist_type) nnidx.addDataPointBatch(db_features) del db_features nnidx.createIndex({ 'post': 2, 'efConstruction': efC, 'M': M }, print_progress=True) nnidx.setQueryTimeParams({'efSearch': knn}) if batch_size == -1: batch_size = q_features.shape[0] n_batches = int(np.ceil(q_features.shape[0] / batch_size)) for i in tqdm(range(n_batches), ncols=100, desc='Batch #', total=n_batches): s = i * batch_size e = s + batch_size batch_q_features = q_features[s:e] neighbours = nnidx.knnQueryBatch(batch_q_features, k=knn, num_threads=jobs) neighbours = list(zip(*neighbours)) #pdb.set_trace() check_consistency(neighbours[0], knn) check_consistency(neighbours[1], knn) outfile = "{0:s}{1:s}_db_matches.npy".format( outdir, retcfg.get('DEFAULT', 'expname')) np.save(outfile, np.array(neighbours[0])) outfile = "{0:s}{1:s}_db_scores.npy".format( outdir, retcfg.get('DEFAULT', 'expname')) np.save(outfile, np.array(neighbours[1]))
def create_and_search_index(retcfg, jobs): batch_size = -1 q_features = load_features(retcfg['path']['qfeature']) q_namelist = np.loadtxt(retcfg['path']['qlist'], dtype=dict(names=('qname', 'nfeat'), formats=('U100', np.int32))) assert q_features.shape[0] == np.sum(q_namelist['nfeat']), "Inconsistent number of features sum and size of" \ "query features array" norm = retcfg.get('feature', 'norm', fallback=None) db_features = load_features(retcfg['path']['dbfeature']) if norm: db_features = normalize(db_features, norm) q_features = normalize(q_features, norm) outdir = retcfg['path']['outdir'] + "queryfiles/" safe_create_dir(outdir) index_type = retcfg['index']['index_type'] dist_type = retcfg['index']['dist_type'] knn = retcfg.getint('search', 'knn') M = retcfg.getint('index', 'M', fallback=20) efC = retcfg.getint('index', 'efC', fallback=20) print(" -- Creating <{0:s}> NN index".format(index_type)) print(" -> KNN: {0:d}".format(knn)) print(" -> Metric: {0:s}\n".format(dist_type)) nnidx = nmslib.init(method=index_type, space=dist_type) nnidx.addDataPointBatch(db_features) del db_features nnidx.createIndex({'post': 2}, print_progress=True) nnidx.setQueryTimeParams({'efSearch': knn}) if batch_size == -1: batch_size = q_features.shape[0] n_batches = int(np.ceil(q_features.shape[0] / batch_size)) for i in tqdm(range(n_batches), ncols=100, desc='Batch', total=n_batches): s = i * batch_size e = s + batch_size batch_q_features = q_features[s:e] neighbours = nnidx.knnQueryBatch(batch_q_features, k=10, num_threads=jobs) neighbours = list(zip(*neighbours)) indices = np.array(neighbours[0]) distances = np.array(neighbours[1]) s = 0 for qname, n in q_namelist: qdists = distances[s:s + n] qidx = indices[s:s + n] matchfpath = "{0:s}{1:s}.matches".format(outdir, qname) distfpath = "{0:s}{1:s}.dist".format(outdir, qname) print(qname, "-> ", s, ":", s + n) print(" |_ dists: ", qdists.shape) print(" |_ indices: ", qidx.shape, end="\n---\n") np.save(matchfpath + ".npy", qidx) np.save(distfpath + ".npy", qdists) s += n print("---", flush=True)