def to_persubject_dataset(self, quality_scores, **kwargs): newone = empty_object() newone.dataset_name = self.dataset.dataset_name newone.yuv_fmt = self.dataset.yuv_fmt newone.width = self.dataset.width newone.height = self.dataset.height if 'quality_width' in kwargs and kwargs['quality_width'] is not None: newone.quality_width = kwargs['quality_width'] elif hasattr(self.dataset, 'quality_width'): newone.quality_width = self.dataset.quality_width if 'quality_height' in kwargs and kwargs['quality_height'] is not None: newone.quality_height = kwargs['quality_height'] elif hasattr(self.dataset, 'quality_height'): newone.quality_height = self.dataset.quality_height if 'resampling_type' in kwargs and kwargs['resampling_type'] is not None: newone.resampling_type = kwargs['resampling_type'] elif hasattr(self.dataset, 'resampling_type'): newone.resampling_type = self.dataset.resampling_type # ref_videos: deepcopy newone.ref_videos = copy.deepcopy(self.dataset.ref_videos) # dis_videos: use input aggregate scores dis_videos = [] for dis_video, quality_score in zip(self.dataset.dis_videos, quality_scores): assert 'os' in dis_video # quality_score should be a 1-D array with (processed) per-subject scores assert hasattr(quality_score, '__len__') assert len(dis_video['os']) == len(quality_score) for persubject_score in quality_score: dis_video2 = copy.deepcopy(dis_video) if 'os' in dis_video2: # remove 'os' - opinion score del dis_video2['os'] dis_video2['groundtruth'] = persubject_score dis_videos.append(dis_video2) newone.dis_videos = dis_videos return newone
def to_dataset(self): """ Override DatasetReader.to_dataset(). Need to overwrite dis_video['os'] """ newone = empty_object() newone.__dict__.update(self.dataset.__dict__) # deep copy ref_videos and dis_videos newone.ref_videos = copy.deepcopy(self.dataset.ref_videos) newone.dis_videos = copy.deepcopy(self.dataset.dis_videos) # overwrite dis_video['os'] score_mtx = self.opinion_score_2darray num_videos, num_subjects = score_mtx.shape assert num_videos == len(newone.dis_videos) for scores, dis_video in zip(score_mtx, newone.dis_videos): dis_video['os'] = list(scores) return newone
def to_aggregated_dataset(self, aggregate_scores, **kwargs): newone = empty_object() newone.dataset_name = self.dataset.dataset_name newone.yuv_fmt = self.dataset.yuv_fmt newone.width = self.dataset.width newone.height = self.dataset.height if 'quality_width' in kwargs and kwargs['quality_width'] is not None: newone.quality_width = kwargs['quality_width'] elif hasattr(self.dataset, 'quality_width'): newone.quality_width = self.dataset.quality_width if 'quality_height' in kwargs and kwargs['quality_height'] is not None: newone.quality_height = kwargs['quality_height'] elif hasattr(self.dataset, 'quality_height'): newone.quality_height = self.dataset.quality_height if 'resampling_type' in kwargs and kwargs[ 'resampling_type'] is not None: newone.resampling_type = kwargs['resampling_type'] elif hasattr(self.dataset, 'resampling_type'): newone.resampling_type = self.dataset.resampling_type # ref_videos: deepcopy newone.ref_videos = copy.deepcopy(self.dataset.ref_videos) # dis_videos: use input aggregate scores dis_videos = [] assert len(self.dataset.dis_videos) == len(aggregate_scores) for dis_video, score in zip(self.dataset.dis_videos, aggregate_scores): dis_video2 = copy.deepcopy(dis_video) if 'os' in dis_video2: # remove 'os' - opinion score del dis_video2['os'] dis_video2['groundtruth'] = score dis_videos.append(dis_video2) newone.dis_videos = dis_videos return newone
def to_aggregated_dataset(self, aggregate_scores, **kwargs): newone = empty_object() newone.dataset_name = self.dataset.dataset_name newone.yuv_fmt = self.dataset.yuv_fmt newone.width = self.dataset.width newone.height = self.dataset.height if 'quality_width' in kwargs and kwargs['quality_width'] is not None: newone.quality_width = kwargs['quality_width'] elif hasattr(self.dataset, 'quality_width'): newone.quality_width = self.dataset.quality_width if 'quality_height' in kwargs and kwargs['quality_height'] is not None: newone.quality_height = kwargs['quality_height'] elif hasattr(self.dataset, 'quality_height'): newone.quality_height = self.dataset.quality_height if 'resampling_type' in kwargs and kwargs['resampling_type'] is not None: newone.resampling_type = kwargs['resampling_type'] elif hasattr(self.dataset, 'resampling_type'): newone.resampling_type = self.dataset.resampling_type # ref_videos: deepcopy newone.ref_videos = copy.deepcopy(self.dataset.ref_videos) # dis_videos: use input aggregate scores dis_videos = [] assert len(self.dataset.dis_videos) == len(aggregate_scores) for dis_video, score in zip(self.dataset.dis_videos, aggregate_scores): dis_video2 = copy.deepcopy(dis_video) if 'os' in dis_video2: # remove 'os' - opinion score del dis_video2['os'] dis_video2['groundtruth'] = score dis_videos.append(dis_video2) newone.dis_videos = dis_videos return newone
def _metrics_performance(objScoDif, signif): """ mirroring matlab function: %[results] = Metrics_performance(objScoDif, signif, doPlot) % INPUT: objScoDif : differences of objective scores [M,N] % M : number of metrics % N : number of pairs % signif : statistical outcome of paired comparison [1,N] % 0 : no difference % -1 : first stimulus is worse % 1 : first stimulus is better % doPlot : boolean indicating if graphs should be plotted % % OUTPUT: results - structure with following fields % % AUC_DS : Area Under the Curve for Different/Similar ROC % analysis % pDS_DL : p-values for AUC_DS from DeLong test % pDS_HM : p-values for AUC_DS from Hanley and McNeil test % AUC_BW : Area Under the Curve for Better/Worse ROC % analysis % pBW_DL : p-values for AUC_BW from DeLong test % pBW_HM : p-values for AUC_BW from Hanley and McNeil test % CC_0 : Correct Classification @ DeltaOM = 0 for % Better/Worse ROC analysis % pCC0_b : p-values for CC_0 from binomial test % pCC0_F : p-values for CC_0 from Fisher's exact test % THR : threshold for 95% probability that the stimuli % are different """ # M = size(objScoDif,1); # D = abs(objScoDif(:,signif ~= 0)); # S = abs(objScoDif(:,signif == 0)); # samples.spsizes = [size(D,2),size(S,2)]; # samples.ratings = [D,S]; M = objScoDif.shape[0] D = np.abs(objScoDif[:, indices(signif[0], lambda x: x != 0)]) S = np.abs(objScoDif[:, indices(signif[0], lambda x: x == 0)]) samples = empty_object() samples.spsizes = [D.shape[1], S.shape[1]] samples.ratings = np.hstack([D, S]) # % calculate AUCs # [AUC_DS,C] = fastDeLong(samples); AUC_DS, C, _, _ = fastDeLong(samples) # % significance calculation # pDS_DL = ones(M); # for i=1:M-1 # for j=i+1:M # pDS_DL(i,j) = calpvalue(AUC_DS([i,j]), C([i,j],[i,j])); # pDS_DL(j,i) = pDS_DL(i,j); # end # end pDS_DL = np.ones([M, M]) for i in range(1, M): for j in range(i + 1, M + 1): # http://stackoverflow.com/questions/4257394/slicing-of-a-numpy-2d-array-or-how-do-i-extract-an-mxm-submatrix-from-an-nxn-ar pDS_DL[i - 1, j - 1] = calpvalue( AUC_DS[[i - 1, j - 1]], C[[[i - 1], [j - 1]], [i - 1, j - 1]]) pDS_DL[j - 1, i - 1] = pDS_DL[i - 1, j - 1] # [pDS_HM,CI_DS] = significanceHM(S, D, AUC_DS); pDS_HM, CI_DS = significanceHM(S, D, AUC_DS) # THR = prctile(D',95); THR = np.percentile(D, 95, axis=1) # %%%%%%%%%%%%%%%%%%%%%%% Better / Worse %%%%%%%%%%%%%%%%%%%%%%%%%%% # B = [objScoDif(:,signif == 1),-objScoDif(:,signif == -1)]; # W = -B; # samples.ratings = [B,W]; # samples.spsizes = [size(B,2),size(W,2)]; B1 = objScoDif[:, indices(signif[0], lambda x: x == 1)] B2 = objScoDif[:, indices(signif[0], lambda x: x == -1)] B = np.hstack([B1, -B2]) W = -B samples = empty_object() samples.ratings = np.hstack([B, W]) samples.spsizes = [B.shape[1], W.shape[1]] # % calculate AUCs # [AUC_BW,C] = fastDeLong(samples); AUC_BW, C, _, _ = fastDeLong(samples) # % calculate correct classification for DeltaOM = 0 # L = size(B,2) + size(W,2); # CC_0 = zeros(M,1); # for m=1:M # CC_0(m) = (sum(B(m,:)>0) + sum(W(m,:)<0)) / L; # end L = B.shape[1] + W.shape[1] CC_0 = np.zeros(M) for m in range(M): CC_0[m] = float(np.sum(B[m, :] > 0) + np.sum(W[m, :] < 0)) / L # % significance calculation # pBW_DL = ones(M); # pCC0_b = ones(M); # pCC0_F = ones(M); # for i=1:M-1 # for j=i+1:M # pBW_DL(i,j) = calpvalue(AUC_BW([i,j]), C([i,j],[i,j])); # pBW_DL(j,i) = pBW_DL(i,j); # # pCC0_b(i,j) = significanceBinomial(CC_0(i), CC_0(j), L); # pCC0_b(j,i) = pCC0_b(i,j); # # pCC0_F(i,j) = fexact(CC_0(i)*L, 2*L, CC_0(i)*L + CC_0(j)*L, L, 'tail', 'b')/2; # pCC0_F(j,i) = pCC0_F(i,j); # end # end pBW_DL = np.ones([M, M]) pCC0_b = np.ones([M, M]) # pCC0_F = np.ones([M, M]) for i in range(1, M): for j in range(i + 1, M + 1): pBW_DL[i - 1, j - 1] = calpvalue( AUC_BW[[i - 1, j - 1]], C[[[i - 1], [j - 1]], [i - 1, j - 1]]) pBW_DL[j - 1, i - 1] = pBW_DL[i - 1, j - 1] pCC0_b[i - 1, j - 1] = significanceBinomial(CC_0[i - 1], CC_0[j - 1], L) pCC0_b[j - 1, i - 1] = pCC0_b[i - 1, j - 1] # pCC0_F[i-1, j-1] = fexact(CC_0[i-1]*L, 2*L, CC_0[i-1]*L + CC_0[j-1]*L, L, 'tail', 'b') / 2.0 # pCC0_F[j-1, i-1] = pCC0_F[i-1,j] # [pBW_HM,CI_BW] = significanceHM(B, W, AUC_BW); pBW_HM, CI_BW = significanceHM(B, W, AUC_BW) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # % Adding outputs to the structure # results.AUC_DS = AUC_DS; # results.pDS_DL = pDS_DL; # results.pDS_HM = pDS_HM; # results.AUC_BW = AUC_BW; # results.pBW_DL = pBW_DL; # results.pBW_HM = pBW_HM; # results.CC_0 = CC_0; # results.pCC0_b = pCC0_b; # results.pCC0_F = pCC0_F; # results.THR = THR; result = { 'AUC_DS': AUC_DS, 'pDS_DL': pDS_DL, 'pDS_HM': pDS_HM, 'AUC_BW': AUC_BW, 'pBW_DL': pBW_DL, 'pBW_HM': pBW_HM, 'CC_0': CC_0, 'pCC0_b': pCC0_b, # 'pCC0_F': pCC0_F, 'THR': THR, } # %%%%%%%%%%%%%%%%%%%%%%%% Plot Results %%%%%%%%%%%%%%%%%%%%%%%%%%% # # if(doPlot == 1) # # % Using Benjamini-Hochberg procedure for multiple comparisons in plots # % (note: correlation between groups has to be positive) # # plot_auc(results.pDS_HM,results.AUC_DS, CI_DS, 'AUC (-)','Different/Similar') # plot_cc(results.pCC0_F,results.CC_0,'C_0 (%)','Better/Worse') # plot_auc(results.pBW_HM,results.AUC_BW, CI_BW, 'AUC (-)','Better/Worse') # # end return result
def _metrics_performance(objScoDif, signif): """ mirroring matlab function: %[results] = Metrics_performance(objScoDif, signif, doPlot) % INPUT: objScoDif : differences of objective scores [M,N] % M : number of metrics % N : number of pairs % signif : statistical outcome of paired comparison [1,N] % 0 : no difference % -1 : first stimulus is worse % 1 : first stimulus is better % doPlot : boolean indicating if graphs should be plotted % % OUTPUT: results - structure with following fields % % AUC_DS : Area Under the Curve for Different/Similar ROC % analysis % pDS_DL : p-values for AUC_DS from DeLong test % pDS_HM : p-values for AUC_DS from Hanley and McNeil test % AUC_BW : Area Under the Curve for Better/Worse ROC % analysis % pBW_DL : p-values for AUC_BW from DeLong test % pBW_HM : p-values for AUC_BW from Hanley and McNeil test % CC_0 : Correct Classification @ DeltaOM = 0 for % Better/Worse ROC analysis % pCC0_b : p-values for CC_0 from binomial test % pCC0_F : p-values for CC_0 from Fisher's exact test % THR : threshold for 95% probability that the stimuli % are different """ # M = size(objScoDif,1); # D = abs(objScoDif(:,signif ~= 0)); # S = abs(objScoDif(:,signif == 0)); # samples.spsizes = [size(D,2),size(S,2)]; # samples.ratings = [D,S]; M = objScoDif.shape[0] D = np.abs(objScoDif[:, indices(signif[0], lambda x: x!=0)]) S = np.abs(objScoDif[:, indices(signif[0], lambda x: x==0)]) samples = empty_object() samples.spsizes = [D.shape[1], S.shape[1]] samples.ratings = np.hstack([D, S]) # % calculate AUCs # [AUC_DS,C] = fastDeLong(samples); AUC_DS, C, _, _ = fastDeLong(samples) # % significance calculation # pDS_DL = ones(M); # for i=1:M-1 # for j=i+1:M # pDS_DL(i,j) = calpvalue(AUC_DS([i,j]), C([i,j],[i,j])); # pDS_DL(j,i) = pDS_DL(i,j); # end # end pDS_DL = np.ones([M, M]) for i in range(1, M): for j in range(i+1, M+1): # http://stackoverflow.com/questions/4257394/slicing-of-a-numpy-2d-array-or-how-do-i-extract-an-mxm-submatrix-from-an-nxn-ar pDS_DL[i-1, j-1] = calpvalue(AUC_DS[[i-1, j-1]], C[[[i-1],[j-1]],[i-1, j-1]]) pDS_DL[j-1, i-1] = pDS_DL[i-1, j-1] # [pDS_HM,CI_DS] = significanceHM(S, D, AUC_DS); pDS_HM, CI_DS = significanceHM(S, D, AUC_DS) # THR = prctile(D',95); THR = np.percentile(D, 95, axis=1) # %%%%%%%%%%%%%%%%%%%%%%% Better / Worse %%%%%%%%%%%%%%%%%%%%%%%%%%% # B = [objScoDif(:,signif == 1),-objScoDif(:,signif == -1)]; # W = -B; # samples.ratings = [B,W]; # samples.spsizes = [size(B,2),size(W,2)]; B1 = objScoDif[:, indices(signif[0], lambda x: x== 1)] B2 = objScoDif[:, indices(signif[0], lambda x: x==-1)] B = np.hstack([B1, -B2]) W = -B samples = empty_object() samples.ratings = np.hstack([B, W]) samples.spsizes = [B.shape[1], W.shape[1]] # % calculate AUCs # [AUC_BW,C] = fastDeLong(samples); AUC_BW, C, _, _ = fastDeLong(samples) # % calculate correct classification for DeltaOM = 0 # L = size(B,2) + size(W,2); # CC_0 = zeros(M,1); # for m=1:M # CC_0(m) = (sum(B(m,:)>0) + sum(W(m,:)<0)) / L; # end L = B.shape[1] + W.shape[1] CC_0 = np.zeros(M) for m in range(M): CC_0[m] = float(np.sum(B[m,:]>0) + np.sum(W[m,:]<0)) / L # % significance calculation # pBW_DL = ones(M); # pCC0_b = ones(M); # pCC0_F = ones(M); # for i=1:M-1 # for j=i+1:M # pBW_DL(i,j) = calpvalue(AUC_BW([i,j]), C([i,j],[i,j])); # pBW_DL(j,i) = pBW_DL(i,j); # # pCC0_b(i,j) = significanceBinomial(CC_0(i), CC_0(j), L); # pCC0_b(j,i) = pCC0_b(i,j); # # pCC0_F(i,j) = fexact(CC_0(i)*L, 2*L, CC_0(i)*L + CC_0(j)*L, L, 'tail', 'b')/2; # pCC0_F(j,i) = pCC0_F(i,j); # end # end pBW_DL = np.ones([M, M]) pCC0_b = np.ones([M, M]) # pCC0_F = np.ones([M, M]) for i in range(1, M): for j in range(i+1, M+1): pBW_DL[i-1, j-1] = calpvalue(AUC_BW[[i-1, j-1]], C[[[i-1],[j-1]],[i-1, j-1]]) pBW_DL[j-1, i-1] = pBW_DL[i-1, j-1] pCC0_b[i-1, j-1] = significanceBinomial(CC_0[i-1], CC_0[j-1], L) pCC0_b[j-1, i-1] = pCC0_b[i-1, j-1] # pCC0_F[i-1, j-1] = fexact(CC_0[i-1]*L, 2*L, CC_0[i-1]*L + CC_0[j-1]*L, L, 'tail', 'b') / 2.0 # pCC0_F[j-1, i-1] = pCC0_F[i-1,j] # [pBW_HM,CI_BW] = significanceHM(B, W, AUC_BW); pBW_HM,CI_BW = significanceHM(B, W, AUC_BW) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # % Adding outputs to the structure # results.AUC_DS = AUC_DS; # results.pDS_DL = pDS_DL; # results.pDS_HM = pDS_HM; # results.AUC_BW = AUC_BW; # results.pBW_DL = pBW_DL; # results.pBW_HM = pBW_HM; # results.CC_0 = CC_0; # results.pCC0_b = pCC0_b; # results.pCC0_F = pCC0_F; # results.THR = THR; result = { 'AUC_DS': AUC_DS, 'pDS_DL': pDS_DL, 'pDS_HM': pDS_HM, 'AUC_BW': AUC_BW, 'pBW_DL': pBW_DL, 'pBW_HM': pBW_HM, 'CC_0': CC_0, 'pCC0_b': pCC0_b, # 'pCC0_F': pCC0_F, 'THR': THR, } # %%%%%%%%%%%%%%%%%%%%%%%% Plot Results %%%%%%%%%%%%%%%%%%%%%%%%%%% # # if(doPlot == 1) # # % Using Benjamini-Hochberg procedure for multiple comparisons in plots # % (note: correlation between groups has to be positive) # # plot_auc(results.pDS_HM,results.AUC_DS, CI_DS, 'AUC (-)','Different/Similar') # plot_cc(results.pCC0_F,results.CC_0,'C_0 (%)','Better/Worse') # plot_auc(results.pBW_HM,results.AUC_BW, CI_BW, 'AUC (-)','Better/Worse') # # end return result