def getScoreForHashVal((path_to_db, hash_table, hash_val, class_idx, video_idx, class_idx_gt)): mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id) criterion = (TubeHash.hash_table == hash_table, TubeHash.hash_val == hash_val) vals = mani.selectMix(toSelect=toSelect, criterion=criterion) vals = np.array(vals) if not hasattr(class_idx, '__iter__'): class_idx = [class_idx] total_count_total = vals.shape[0] video_count_total = sum( np.logical_and(vals[:, 0] == class_idx_gt, vals[:, 1] == video_idx)) scores = [] for class_idx_curr in class_idx: class_count = sum(vals[:, 0] == class_idx_curr) if class_idx_curr == class_idx_gt: video_count = video_count_total else: video_count = 0 class_count = class_count - video_count total_count = total_count_total - video_count_total score = class_count / float(total_count) scores.append(score) mani.closeSession() return scores
def script_verifyRecordedScoreMatchesDBScore(params): path_to_db = params.path_to_db path_to_hash = params.path_to_hash total_class_counts = params.total_class_counts img_path = params.img_path class_label = params.class_label video_id = params.video_id shot_id = params.shot_id class_idx = params.class_idx score_file = params.score_file mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.idx, ) criterion = (Tube.class_idx_pascal == class_idx, Tube.video_id == video_id, Tube.shot_id == shot_id) total_shot_patches = mani.count(toSelect, criterion) #get patch id hash_info_patch = getHashInfoForImg(path_to_db, img_path) # patch_id=mani.select((Tube.idx,),(Tube.img_path==img_path,)); # assert len(patch_id)==1; # patch_id=patch_id[0][0]; # #get hash vals # mani_hash=TubeHash_Manipulator(path_to_db); # mani_hash.openSession(); # toSelect=(TubeHash.hash_table,TubeHash.hash_val) # criterion=(TubeHash.idx==patch_id,); # hash_info_patch=mani_hash.select(toSelect,criterion); #get hash_info of all patches in shot criterion = (Tube.class_idx_pascal == class_idx, Tube.video_id == video_id, Tube.shot_id == shot_id) hash_info_all = mani_hash.selectMix(toSelect, criterion) mani_hash.closeSession() mani.closeSession() hash_info_all = list(hash_info_all) hash_scores_patch = [] for idx_hash_info, hash_info_curr in enumerate(hash_info_patch): hash_file_curr = str(hash_info_curr[0]) + '_' + str( hash_info_curr[1]) + '_counts.p' hash_file_curr = os.path.join(path_to_hash, hash_file_curr) hash_bin_class_counts = pickle.load(open(hash_file_curr, 'rb')) hash_bin_class_count = hash_bin_class_counts[class_idx] numo = hash_bin_class_count - hash_info_all.count(hash_info_curr) deno = total_class_counts[class_idx] - total_shot_patches hash_scores_patch.append(numo / float(deno)) score_db = np.mean(hash_scores_patch) print len(hash_scores_patch), score_db, score_file assert np.isclose(score_db, score_file)
def script_saveHashAnalysisImages(params): path_to_db = params.path_to_db class_labels_map = params.class_labels_map percents = params.percents out_file_class_pre = params.out_file_class_pre out_file_hash_simple = params.out_file_hash_simple out_file_hash_byClass = params.out_file_hash_byClass hashtable = params.hashtable inc = params.inc dtype = params.dtype # in_file = params.in_file; if not os.path.exists(out_file_class_pre + '.npz'): mani = Tube_Manipulator(path_to_db) mani.openSession() ids = mani.selectMix((Tube.class_idx_pascal, TubeHash.hash_val), (TubeHash.hash_table == hashtable, )) mani.closeSession() ids = np.array(ids, dtype=dtype) np.savez(out_file_class_pre, ids) ids = np.load(out_file_class_pre + '.npz')['arr_0'] # ids=np.load(in_file)['arr_0']; counts_all, class_ids_breakdown = getClassIdsCount(ids[:, 0], ids[:, 1]) ranks = getDiscriminativeScore(counts_all) sort_idx = np.argsort(ranks) counts_all = [counts_all[idx] for idx in sort_idx] class_ids_breakdown = [class_ids_breakdown[idx] for idx in sort_idx] im_simple = getHashAnalysisIm(counts_all, class_ids_breakdown, inc=inc, colorByClass=False) im_byClass = getHashAnalysisIm(counts_all, class_ids_breakdown, inc=inc, colorByClass=True) visualize.saveMatAsImage(im_simple, out_file_hash_simple) visualize.saveMatAsImage(im_byClass, out_file_hash_byClass) counts_all_ravel = np.array([c for counts in counts_all for c in counts]) class_ids_breakdown_ravel = np.array( [c for class_ids in class_ids_breakdown for c in class_ids]) class_id_pascal, class_idx_pascal = zip(*class_labels_map) for class_id_idx, class_id in enumerate(class_idx_pascal): frequency = counts_all_ravel[class_ids_breakdown_ravel == class_id] out_file = out_file_class_pre + '_' + class_id_pascal[ class_id_idx] + '.png' title = class_id_pascal[class_id_idx] + ' ' + str(class_id) cum_freq, idx_perc = getCumulativeInfo(frequency, percents) savePerClassCumulativeGraph(cum_freq / float(cum_freq[-1]), idx_perc, percents, out_file, title)
def getScoreForIdx(table_idx, path_to_db, class_idx_pascal=None, npz_path=None, n_jobs=1, total_counts=None): mani = Tube_Manipulator(path_to_db) mani.openSession() mani_hash = TubeHash_Manipulator(path_to_db) mani_hash.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id, Tube.img_path) criterion = (Tube.idx == table_idx, ) [(class_idx_gt, video_idx, frame_path)] = mani.select(toSelect, criterion) if class_idx_pascal is not None: class_idx = class_idx_pascal else: class_idx = class_idx_gt toSelect = (TubeHash.hash_table, TubeHash.hash_val) criterion = (TubeHash.idx == table_idx, ) hash_table_info = mani_hash.select(toSelect, criterion) print len(hash_table_info) mani_hash.closeSession() mani.closeSession() args = [] for hash_table_no in range(len(hash_table_info)): hash_table = hash_table_info[hash_table_no][0] hash_val = hash_table_info[hash_table_no][1] if npz_path is not None: args.append((npz_path, hash_table, hash_val, class_idx, video_idx, class_idx_gt, total_counts)) else: args.append((path_to_db, hash_table, hash_val, class_idx, video_idx, class_idx_gt)) if n_jobs > 1: p = multiprocessing.Pool(min(multiprocessing.cpu_count(), n_jobs)) if npz_path is not None: scores = p.map(getScoreForHashValFromNpz, args) else: scores = p.map(getScoreForHashVal, args) else: scores = [] for arg in args: if npz_path is not None: scores.append(getScoreForHashValFromNpz(arg)) else: scores.append(getScoreForHashVal(arg)) return scores, class_idx_gt, frame_path
def getInfoForFeatureExtractionForVideo(path_to_db,video_info,numberOfFrames): info_for_extraction=[]; mani=Tube_Manipulator(path_to_db); mani.openSession(); for pascal_id in video_info: video_ids=video_info[pascal_id]; for video_id in video_ids: info=mani.select((Tube.img_path,Tube.class_id_pascal,Tube.deep_features_path,Tube.deep_features_idx),(Tube.video_id==video_id,Tube.class_id_pascal==pascal_id),distinct=True,limit=numberOfFrames); info_for_extraction=info_for_extraction+info; mani.closeSession(); return info_for_extraction
def getShotFrameCount(path_to_db, class_idx, video_id, shot_id): if type(path_to_db) == str: mani = Tube_Manipulator(path_to_db) mani.openSession() else: mani = path_to_db toSelect = (Tube.idx, ) criterion = (Tube.class_idx_pascal == class_idx, Tube.video_id == video_id, Tube.shot_id == shot_id) frame_count = mani.count(toSelect, criterion) if type(path_to_db) == str: mani.closeSession() return frame_count
def getNVideosByPascalIds(path_to_db,pascal_ids,numberofVideos): dict_out={}; mani=Tube_Manipulator(path_to_db); mani.openSession(); for pascal_id in pascal_ids: total_ids=mani.select((Tube.video_id,),(Tube.class_id_pascal==pascal_id,),distinct=True,limit=numberofVideos); total_ids=[total_id[0] for total_id in total_ids]; # random.shuffle(total_ids); # selected_ids=total_ids[:numberofVideos]; dict_out[pascal_id]=total_ids; mani.closeSession(); return dict_out
def saveTotalClassBreakdowns(path_to_db, out_file): mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id, Tube.shot_id, Tube.tube_id) vals = mani.select(toSelect, distinct=True) mani.closeSession() vals = np.array(vals) class_idx_db = vals[:, 0] ids_db = vals[:, 1:] column_names = ['video', 'shot', 'tube'] counts = getClassCountsByIdType(class_idx_db, ids_db, column_names) pickle.dump(counts, open(out_file, 'wb'))
def script_saveBigFeatureMats(params): out_file_featureMats_pre = params.out_file_featureMats_pre out_file_meta_pre = params.out_file_meta_pre path_to_db = params.path_to_db out_file_paths = params.out_file_paths num_batches = params.num_batches if not os.path.exists(out_file_paths): mani = Tube_Manipulator(path_to_db) mani.openSession() paths_to_features = mani.select((Tube.deep_features_path, ), distinct=True) paths_to_features = [path_curr[0] for path_curr in paths_to_features] mani.closeSession() random.shuffle(paths_to_features) pickle.dump(paths_to_features, open(out_file_paths, 'wb')) paths_to_features = pickle.load(open(out_file_paths, 'rb')) paths_to_features.sort() batch_size = len(paths_to_features) / num_batches idxRange = util.getIdxRange(len(paths_to_features), batch_size) print len(idxRange), idxRange[-1] # start_idx=0; for start_idx in range(len(idxRange) - 1): out_file_curr = out_file_featureMats_pre + '_' + str( start_idx) + '.npz' out_file_meta_curr = out_file_meta_pre + '_' + str(start_idx) + '.p' print start_idx, idxRange[start_idx], idxRange[ start_idx + 1], out_file_curr, out_file_meta_curr, paths_to_features_curr = paths_to_features[ idxRange[start_idx]:idxRange[start_idx + 1]] t = time.time() train, shape_record = getGiantFeaturesMatGPU(paths_to_features_curr) train = np.array(train) np.savez(out_file_curr, train) pickle.dump([paths_to_features_curr, shape_record], open(out_file_meta_curr, 'wb')) print time.time() - t break
def main(): return path_to_db = 'sqlite://///disk2/novemberExperiments/experiments_youtube/patches_nn_hash.db' mani = Tube_Manipulator(path_to_db) mani_hash = TubeHash_Manipulator(path_to_db) mani.openSession() deep_features_path_all = mani.select((Tube.deep_features_path, ), distinct=True) deep_features_path_all = [x[0] for x in deep_features_path_all] print len(deep_features_path_all) # print deep_features_path_all[:10] mani_hash.openSession() for idx_deep_features_path, deep_features_path in enumerate( deep_features_path_all[11:]): t = time.time() hash_file = deep_features_path[:-4] + '_hash.npy' print hash_file idx_info = mani.select( (Tube.idx, Tube.deep_features_idx), (Tube.deep_features_path == deep_features_path, )) # idx_all,deep_features_idx_all=zip(*idx_info); hash_vals = np.load(hash_file) # print len(idx_all),hash_vals.shape for idx_foreign, row in idx_info: # hash_vals_curr=hash_vals[row]; for hash_table, hash_val in enumerate(hash_vals[row]): # pass; # print type(idx_foreign),type(hash_table),type(int(hash_val)) mani_hash.insert(idx=idx_foreign, hash_table=hash_table, hash_val=int(hash_val), commit=False) if idx_deep_features_path % 10 == 0: mani_hash.session.commit() # print time.time()-t; mani_hash.closeSession() mani.closeSession()
def getHashInfoForImg(path_to_db, img_path): mani = Tube_Manipulator(path_to_db) mani.openSession() #get patch id patch_id = mani.select((Tube.idx, ), (Tube.img_path == img_path, )) assert len(patch_id) == 1 patch_id = patch_id[0][0] mani.closeSession() #get hash vals mani_hash = TubeHash_Manipulator(path_to_db) mani_hash.openSession() toSelect = (TubeHash.hash_table, TubeHash.hash_val) criterion = (TubeHash.idx == patch_id, ) hash_info_patch = mani_hash.select(toSelect, criterion) mani_hash.closeSession() return hash_info_patch
def getTotalCountsPerClass(path_to_db, class_idx_all): mani = Tube_Manipulator(path_to_db) mani.openSession() total_counts = {} # total=0; for class_idx in class_idx_all: toSelect = (Tube.idx, ) criterion = (Tube.class_idx_pascal == class_idx, ) count_curr = mani.count(toSelect, criterion, distinct=True) total_counts[class_idx] = count_curr # total=total+count_curr; # print class_label,class_idx,count_curr,count_curr/float(6371288),total_counts # print total mani.closeSession() return total_counts
def writeMetaInfoToDb(path_to_db,out_files,idx_global,class_ids_all,path_to_data): mani=Tube_Manipulator(path_to_db); mani.openSession(); for out_file_idx,out_file in enumerate(out_files): if out_file_idx%100==0: print out_file_idx,len(out_files) in_file_text=out_file.replace('.npz','.txt'); patch_files=util.readLinesFromFile(in_file_text); # print out_file,in_file_text,len(patch_files); for idx_img_file,img_file in enumerate(patch_files): img_path=img_file; img_path_split=img_path.split('/'); img_path_split=[segment for segment in img_path_split if segment!='']; mat_name=img_path_split[-3]; class_id_pascal=mat_name[:mat_name.index('_')]; video_id=int(mat_name[mat_name.index('_')+1:mat_name.rindex('_')]); shot_id=int(mat_name[mat_name.rindex('_')+1:]); tube_id=int(img_path_split[-2]); frame_id=img_path_split[-1]; frame_id=int(frame_id[:frame_id.index('.')]); # frame_id+=1 class_idx_pascal=class_ids_all.index(class_id_pascal); deep_features_path=out_file; deep_features_idx=idx_img_file; layer='fc7'; frame_path=getFramePath(path_to_data,class_id_pascal,video_id,shot_id,frame_id+1) assert os.path.exists(frame_path); mani.insert(idx_global, img_path, frame_id, video_id, tube_id, shot_id, frame_path=frame_path, layer=layer, deep_features_path=deep_features_path, deep_features_idx=deep_features_idx, class_id_pascal=class_id_pascal, class_idx_pascal=class_idx_pascal,commit=False); idx_global+=1; mani.session.commit(); mani.closeSession(); return idx_global;
def getTubePathsForShot(path_to_db,class_id_pascal,video_id,shot_id,frame_to_choose='middle'): mani=Tube_Manipulator(path_to_db); mani.openSession(); frame_ids=mani.select((Tube.frame_id,),(Tube.class_id_pascal==class_id_pascal,Tube.shot_id==shot_id,Tube.video_id==video_id),distinct=True); frame_ids=[frame_id[0] for frame_id in frame_ids]; frame_ids.sort(); if frame_to_choose=='middle': middle_idx=len(frame_ids)/2; frame_id=frame_ids[middle_idx]; else: frame_id=0; paths=mani.select((Tube.img_path,),(Tube.class_id_pascal==class_id_pascal,Tube.shot_id==shot_id,Tube.video_id==video_id,Tube.frame_id==frame_id),distinct=True); paths=[path[0] for path in paths]; mani.closeSession(); return paths;
def getHashBinClassBreakdowns((hash_table, hash_val, path_to_db, out_file, idx)): print idx mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.class_idx_pascal, Tube.video_id, Tube.shot_id, Tube.tube_id) criterion = (TubeHash.hash_table == hash_table, TubeHash.hash_val == hash_val) vals = mani.selectMix(toSelect, criterion=criterion, distinct=True) mani.closeSession() vals = np.array(vals) class_idx_db = vals[:, 0] ids_db = vals[:, 1:] column_names = ['video', 'shot', 'tube'] counts = getClassCountsByIdType(class_idx_db, ids_db, column_names) # for k in counts.keys(): # for k2 in counts[k].keys(): # print k,k2,counts[k][k2]; # return counts pickle.dump(counts, open(out_file, 'wb'))
def verifyTotalClassBreakdowns(path_to_db, out_file): counts = pickle.load(open(out_file, 'rb')) mani = Tube_Manipulator(path_to_db) mani.openSession() for class_idx in range(10): print class_idx toSelect = (Tube.video_id, ) criterion = (Tube.class_idx_pascal == class_idx, ) count_video = mani.count(toSelect, criterion, distinct=True) toSelect = (Tube.video_id, Tube.shot_id) count_shot = mani.count(toSelect, criterion, distinct=True) toSelect = (Tube.video_id, Tube.shot_id, Tube.tube_id) count_tube = mani.count(toSelect, criterion, distinct=True) print counts['video'][class_idx], count_video, print counts['shot'][class_idx], count_shot, print counts['tube'][class_idx], count_tube assert counts['video'][class_idx] == count_video assert counts['shot'][class_idx] == count_shot assert counts['tube'][class_idx] == count_tube mani.closeSession()
def script_saveNpzScorePerShot(params): path_to_db = params['path_to_db'] total_class_counts = params['total_class_counts'] class_idx = params['class_idx'] video_id = params['video_id'] shot_id = params['shot_id'] out_file_scores = params['out_file_scores'] path_to_hash = params['path_to_hash'] num_hash_tables = params['num_hash_tables'] class_idx_assume = params.get('class_idx_assume', None) if class_idx_assume is None: class_idx_assume = class_idx print params['idx'] # print 'getting vals and frame count from db', # t=time.time(); mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.deep_features_path, Tube.tube_id, Tube.deep_features_idx, TubeHash.hash_table, TubeHash.hash_val) criterion = (Tube.video_id == video_id, Tube.class_idx_pascal == class_idx, Tube.shot_id == shot_id) vals = mani.selectMix(toSelect, criterion) total_frames = getShotFrameCount(mani, class_idx, video_id, shot_id) mani.closeSession() # print time.time()-t # print 'getting hash_counts', # t=time.time(); hash_info = [(tuple_curr[3], tuple_curr[4]) for tuple_curr in vals] hash_counts = dict(Counter(hash_info)) # print time.time()-t # print 'getting hash_bin_scores', # t=time.time(); if class_idx_assume == class_idx: total_class_count = total_class_counts[class_idx_assume] - total_frames else: total_class_count = total_class_counts[class_idx_assume] hash_bin_scores = {} for idx, k in enumerate(hash_counts.keys()): in_file = str(k[0]) + '_' + str(k[1]) + '_counts.p' class_id_counts = pickle.load( open(os.path.join(path_to_hash, in_file), 'rb')) if class_idx_assume == class_idx: hash_bin_count = class_id_counts.get(class_idx_assume, 0) - hash_counts[k] else: hash_bin_count = class_id_counts.get(class_idx_assume, 0) hash_bin_scores[k] = hash_bin_count / float(total_class_count) # print time.time()-t; # print 'getting tube_scores_all', # t=time.time(); vals_org = np.array(vals) deep_features_paths = vals_org[:, 0] vals = np.array(vals_org[:, 1:], dtype=int) # Tube.tube_id,Tube.deep_features_idx,TubeHash.hash_table,TubeHash.hash_val tube_ids = np.unique(vals[:, 0]) # deep_features_idx=np.unique(vals[:,1]); tube_scores_all = {} for tube_id in tube_ids: # print tube_id,len(deep_features_idx),len(np.unique(vals[vals[:,0]==tube_id,1])); deep_features_idx = np.unique(vals[vals[:, 0] == tube_id, 1]) tube_scores_all[tube_id] = getTubeScoresMat(tube_id, vals, hash_bin_scores, deep_features_idx, num_hash_tables) # print time.time()-t; # for tube_id in tube_scores_all: # tube_scores=tube_scores_all[tube_id]; # print tube_id,np.sum(np.isnan(tube_scores)),tube_scores.shape # # print tube_scores[0,:] # assert np.sum(np.isnan(tube_scores))==0; # out_file_temp='/disk2/temp/temp.p'; # np.savez_compressed(out_file_scores,tube_scores_all.values(),tube_scores_all.keys()) pickle.dump(tube_scores_all, open(out_file_scores, 'wb'))
def getInfoForExtractionForTube(path_to_db,pascal_id,video_id,shot_id,tube_id): mani=Tube_Manipulator(path_to_db); mani.openSession(); info=mani.select((Tube.img_path,Tube.class_id_pascal,Tube.deep_features_path,Tube.deep_features_idx),(Tube.video_id==video_id,Tube.class_id_pascal==pascal_id,Tube.tube_id==tube_id,Tube.shot_id==shot_id),distinct=True); mani.closeSession(); return info
def script_saveNpzScorePerShot_normalized(params): path_to_db = params['path_to_db'] file_binCounts = params['file_binCounts'] class_idx = params['class_idx'] video_id = params['video_id'] shot_id = params['shot_id'] out_file_scores = params['out_file_scores'] num_hash_tables = params['num_hash_tables'] total_counts = params['total_class_counts'] class_idx_assume = params.get('class_idx_assume', None) if class_idx_assume is None: class_idx_assume = class_idx print params['idx'] mani = Tube_Manipulator(path_to_db) mani.openSession() toSelect = (Tube.tube_id, Tube.deep_features_idx, TubeHash.hash_table, TubeHash.hash_val) criterion = (Tube.video_id == video_id, Tube.class_idx_pascal == class_idx, Tube.shot_id == shot_id) vals = mani.selectMix(toSelect, criterion) total_frames = getShotFrameCount(mani, class_idx, video_id, shot_id) mani.closeSession() hash_count_keys, hash_counts = pickle.load(open(file_binCounts, 'rb')) hash_info = [tuple(r) for r in vals[:, 2:]] hash_counts = dict(Counter(hash_info)) # total_counts=np.sum(hash_counts,axis=0); scores_all = {} vals = np.array(vals) tube_ids_uni = np.unique(vals[:, 0]) for tube_id in tube_ids_uni: vals_rel = vals[vals[:, 0] == tube_id, 1:] deep_features_idx_uni = np.unique(vals_rel[:, 0]) scores_tube = np.empty((len(deep_features_idx_uni), num_hash_tables)) scores_tube[:] = np.nan for deep_features_idx in deep_features_idx_uni: hash_info = vals_rel[vals_rel[:, 0] == deep_features_idx, 1:] assert len(hash_info) == num_hash_tables scores = [] for hash_info_curr in hash_info: idx_curr = hash_count_keys.index(tuple(hash_info_curr)) counts_curr = hash_counts[idx_curr, :] deno = counts_curr / total_counts.astype(dtype=float) numo = deno[class_idx_assume] deno = sum(deno) score_curr = numo / float(deno) scores.append(score_curr) scores_tube[deep_features_idx, :] = scores scores_all[tube_id] = scores_tube for tube_id in scores_all: tube_scores = scores_all[tube_id] assert np.sum(np.isnan(tube_scores)) == 0 pickle.dump(scores_all, open(out_file_scores, 'wb'))