import numpy as np import vsSummDevs.datasets.SumMe.path_vars as dataset_pathvars from vsSummDevs.SumEvaluation import metrics from vsSummDevs.datasets.SumMe import SumMeMultiViewFeatureLoader video_file_stem_names = dataset_pathvars.file_names video_file_stem_names.sort() t_acc_min = 0 t_acc_max = 0 t_acc_mean = 0 for video_idx, s_filename in enumerate(video_file_stem_names): _, user_labels, feature_sizes = SumMeMultiViewFeatureLoader.load_by_name(s_filename, doSoftmax=False) s_F1_scores = [] for user_idx in range(user_labels.shape[1]): selected_labels = user_labels[:, user_idx] user_scores_list = [user_labels[:, i] for i in list(set(range(user_labels.shape[1]))-set([user_idx]))] s_F1_score = metrics.averaged_F1_score(y_trues=user_scores_list, y_score=selected_labels.tolist()) s_F1_scores.append(s_F1_score) print "[{:02d} | {:02d}]\t{:s}: \tMin:{:.04f}\tMax:{:.04f}, Mean:{:.04f}".format(video_idx, len(video_file_stem_names), s_filename, min(s_F1_scores), max(s_F1_scores), np.mean(np.asarray(s_F1_scores))) t_acc_min += min(s_F1_scores) t_acc_max += max(s_F1_scores) t_acc_mean += np.mean(np.asarray(s_F1_scores)) print "Total MinAcc: {:.04f}\t MaxAcc{:.04f}\t Mean:{:.04f}".format(t_acc_min/len(video_file_stem_names), t_acc_max/len(video_file_stem_names), t_acc_mean/len(video_file_stem_names))
from scipy.stats.stats import pearsonr from sklearn.decomposition import PCA import datasets.getShotBoundariesECCV2016 as getSegs from vsSummDevs.datasets.SumMe import SumMeMultiViewFeatureLoader from vsSummDevs.SumEvaluation import rep_conversions, metrics import sklearn videofile_stems = dataset_pathvars.file_names videofile_stems.sort() pdefined_segs = getSegs.getSumMeShotBoundaris() F1_scores = 0 for video_idx, s_filename in enumerate(videofile_stems): video_features, user_labels, _ = SumMeMultiViewFeatureLoader.load_by_name( s_filename, doSoftmax=False) sklearn.preprocessing.normalize(video_features) pca = PCA(whiten=True, svd_solver='auto') pca.fit(video_features.transpose()) matrix = pca.components_ frame_contrib = np.sum(pca.components_, axis=0) frame_contrib = (frame_contrib - np.min(frame_contrib)) / ( np.max(frame_contrib) - np.mean(frame_contrib)) s_seg = pdefined_segs[s_filename] s_frame01scores = rep_conversions.framescore2frame01score( frame_contrib, s_seg) # s_frame01scores = rep_conversions.framescore2frame01score_sort(frame_contrib)
FeatureDirecotry = ['/home/zwei/datasets/SumMe/features/ImageNet/VGG'] FeatureDirecotry.append('/home/zwei/datasets/SumMe/features/Kinetics/I3D') FeatureDirecotry.append('/home/zwei/datasets/SumMe/features/Places/ResNet50') FeatureDirecotry.append('/home/zwei/datasets/SumMe/features/Moments/ResNet50') from vsSummDevs.SumEvaluation import rep_conversions, metrics from vsSummDevs.datasets.SumMe import SumMeMultiViewFeatureLoader # s_feature_path = feature_paths[0] pdefined_segs = getSegs.getSumMeShotBoundaris() videofile_stems = dataset_pathvars.file_names F1_scores = 0 for video_idx, s_filename in enumerate(videofile_stems): video_features, user_labels, _ = SumMeMultiViewFeatureLoader.load_by_name( s_filename) avg_labels = np.mean(user_labels, axis=1) clf = svm.LinearSVR() clf.fit(video_features, avg_labels) frame_contrib = clf.predict(video_features) # frame_contrib = (frame_contrib- np.min(frame_contrib))/(np.max(frame_contrib)-np.mean(frame_contrib)) # s_seg = pdefined_segs[s_filename] # s_frame01scores = rep_conversions.framescore2frame01score(frame_contrib, s_seg) # s_frame01scores = rep_conversions.framescore2frame01score_sort( frame_contrib) user_scores_list = [user_labels[:, i] for i in range(user_labels.shape[1])] s_F1_score = metrics.averaged_F1_score(y_trues=user_scores_list, y_score=s_frame01scores.tolist())
sum_len += s_inteval[1] - s_inteval[0] if sum_len > ratio * nFrames: return False return True sample_rate = 5 pdefined_segs = getSegs.getSumMeShotBoundaris() video_file_stem_names = dataset_pathvars.file_names video_file_stem_names.sort() totalF1 = 0 for video_idx, s_filename in enumerate(video_file_stem_names): # s_filename = video_file_stem_names[0] s_segments = pdefined_segs[s_filename].tolist() video_features, user_labels, feature_sizes = SumMeMultiViewFeatureLoader.load_by_name( s_filename, doSoftmax=True) # original_nFrames = video_features.shape[0] # video_features = video_features[::sample_rate, :] # avg_labels = np.mean(user_labels, axis=1) frame_entropy = SumMeMultiViewFeatureLoader.feature_entropy( video_features, feature_sizes) frame_entropy = (frame_entropy - np.min(frame_entropy)) / ( np.max(frame_entropy) - np.min(frame_entropy)) s_frame01scores = rep_conversions.framescore2frame01score( frame_entropy, s_segments) user_scores_list = [user_labels[:, i] for i in range(user_labels.shape[1])] s_F1_score = metrics.averaged_F1_score(y_trues=user_scores_list, y_score=s_frame01scores.tolist())
'Places': [2, 3], 'Moments': [3, 4] } doSoftMax = False L2NormFeature = False # feature_type = 'Moments' print "Do SoftMax: " + str(doSoftMax) + "\tL2Norm: " + str(L2NormFeature) # print "Selected Type: {:s}".format(feature_type) for video_idx, s_filename in enumerate(videofile_stems): # selected_feature_type= feature_set[feature_type] video_features, _, feature_sizes = SumMeMultiViewFeatureLoader.load_by_name( s_filename, doSoftmax=doSoftMax) feature_boundary = [0] feature_boundary.extend(np.cumsum(np.asarray(feature_sizes)).tolist()) # video_features = video_features[:, feature_boundary[selected_feature_type[0]]:feature_boundary[selected_feature_type[1]]] if L2NormFeature: video_features = sklearn.preprocessing.normalize(video_features) n_frames = video_features.shape[0] key = KyLoader.searchKeyByFrame(n_frames, video_frames, dataset_keys) user_summary = dataset[key]['user_summary'][...] nfps = dataset[key]['n_frame_per_seg'][...].tolist() cps = dataset[key]['change_points'][...] positions = KyLoader.createPositions(n_frames, frame_rate) video_features = video_features[positions]
from vsSummDevs.SumEvaluation.knapsack import knapsack_dp import vsSummDevs.SumEvaluation.vsum_tools as vsum_tools videofile_stems = dataset_pathvars.file_names videofile_stems.sort() feature_set = { 'ImageNet': [0, 1], 'Kinetics': [1, 2], 'Places': [2, 3], 'Moments': [3, 4], 'All': [0, 4] } sum_summary_score = 0 for s_video_stem in videofile_stems: _, s_labels, _ = SumMeMultiViewFeatureLoader.load_by_name( s_video_stem, doSoftmax=False, featureset=feature_set['ImageNet']) sum_scores = np.sum(s_labels, axis=1) s_segments = vsSummDevs.obs_loaders.SumMeDataLoader.convertlabels2NonoverlappedSegs( s_labels) s_values = [] updated_segments = [] updated_values = [] for s_segment in s_segments: s_len = s_segment[1] - s_segment[0] if s_len > 0: s_value = np.sum( sum_scores[s_segment[0]:s_segment[1]]) * 1. / s_len if s_value > 0: updated_values.append(s_value) updated_segments.append(s_segment)
ax.axis('equal') return sample_rate = 5 video_file_stem_names = dataset_pathvars.file_names video_file_stem_names.sort() totalF1 = 0 save_dir = dir_utils.get_dir('t-SNEVisualization') for video_idx, s_filename in enumerate(video_file_stem_names): print '[{:d} | {:d}], {:s}'.format(video_idx, len(video_file_stem_names), s_filename) video_features, user_labels, feature_sizes = SumMeMultiViewFeatureLoader.load_by_name( s_filename, doSoftmax=False) avg_labels = np.mean(user_labels, axis=1) nframes = video_features.shape[0] feature_dim = video_features.shape[1] position_features = SumMeMultiViewFeatureLoader.PositionEncoddings( nframes, feature_dim) position_features = position_features[::sample_rate, :] avg_labels = avg_labels[::sample_rate] digits_proj = TSNE(random_state=0).fit_transform(position_features) ax = plt.subplot(1, 1, 1) subscatter(ax, digits_proj, avg_labels, sample_rate) plt.title(s_filename) # ax.title(s_filename) # # plt.title("Score Distribution") save_name = os.path.join(save_dir,
def __init__(self, split='train', doSoftMax=False, L2Norm=False, clip_size=100, sample_rate=15, feature_type='ImageNet'): print "Softmax:\t{0}\tL2Norm\t{1}".format(doSoftMax, L2Norm) self.split = split if self.split == 'train': self.video_stems = train_video_stems elif self.split == 'val': self.video_stems = val_video_stems else: print "Unrecognized data split: {:s}".format(split) sys.exit(-1) self.clip_size = clip_size self.videofeatures = {} self.annotations = {} for s_video_stem in self.video_stems: s_video_features, s_labels, _ = SumMeMultiViewFeatureLoader.load_by_name( s_video_stem, doSoftmax=doSoftMax, featureset=feature_set[feature_type]) s_video_features = s_video_features[::sample_rate] s_labels = s_labels[::sample_rate] if self.split == 'val': s_labels = s_labels[:, :15] # if validation, keep top 15 n_frames = s_video_features.shape[0] if s_video_features.shape[0] < clip_size: print "{:s} don't have enough frames, skipping".format( s_video_stem) continue if L2Norm: s_video_features = sklearn.preprocessing.normalize( s_video_features) self.videofeatures[s_video_stem] = s_video_features s_segs = convertlabels2segs(s_labels) s_annotations = [] for s_seg in s_segs: s_annotation = Segment(s_video_stem) s_annotation.initId(startId=s_seg[0], endId=s_seg[1], length=n_frames) s_annotations.append(s_annotation) self.annotations[s_video_stem] = s_annotations self.r_overlap = 0.3 self.n_videos = len(self.annotations) if self.split == 'train': self.dataset_size = 10000 else: list_annotations = [] for s_video_stem in self.annotations.keys(): list_annotations.extend(self.annotations[s_video_stem]) self.annotations = list_annotations self.dataset_size = len(self.annotations) self.counter = 0