def calculate_iaa(data_dict): i = 0 data = [] for key, value in data_dict.items(): i += 1 data.append( ('Annotator1', i, frozenset((value['label1'], value['label1_2'])))) data.append( ('Annotator2', i, frozenset((value['label2'], value['label2_2'])))) print(data) t = AnnotationTask(data=data, distance=masi_distance) print(t.avg_Ao())
def agree_tags(delta, column): """ egytokenes címkézési feladatokra számol egyetértést :param delta: az összevetett adat :param column: az az oszlop, amelyre egyetértést akarunk számolni :return: """ by_field = reverse_tags(delta, column) task = AnnotationTask(data=by_field) oa = task.avg_Ao() # observed agreement s = task.S() # Bennett, Albert and Goldstein S (1954) all categories are equally likely pi = task.pi() # Scott pi (1955) single distribution kappa = task.kappa() # Cohen kappa (1960) individual coder distribution w_kappa = task.weighted_kappa() alpha = task.alpha() # Krippendorff alpha (1980) return oa, s, pi, kappa, w_kappa, alpha
def compute_annotator_agreement_nltkmetrics(data_array): ''' See http://nltk.org/api/nltk.metrics.html#nltk.metrics.agreement ''' print "####### Agreement coefficients according to NLTK metrics.agreement #######" t = AnnotationTask(data=data_array) print "Average observed agreement across all coders and items: "+str(t.avg_Ao()) print "Cohen's Kappa (Cohen 1960): "+str(t.kappa()) print "Weighted kappa (Cohen 1968): "+str(t.weighted_kappa()) print "Scott's pi (Scott 1955): "+str(t.pi()) #print "pi_avg: "+str(t.pi_avg()) print "alpha (Krippendorff 1980): "+str(t.alpha()) print "Observed disagreement for the alpha coefficient: "+str(t.Do_alpha()) print "S (Bennett, Albert and Goldstein 1954): "+str(t.S()) #print "n-notation used in Artstein and Poesio (2007): "+str(t.N(k=, ic???)) print "Observed disagreement for the weighted kappa coefficient averaged over all labelers: "+str(t.Do_Kw())
def getagreement(tpl,datadir,task_type='all'): """Get agreement values for annotators in the :data:'tpl' list Args: tpl (list): combination group of annotators datadir (str): Cache data directory used by joblib Returns: namedtuple defined as ``Agree = collections.namedtuple('Agree', ['kappa', 'alpha','avg_ao'], verbose=True)`` """ mem = Memory(cachedir=datadir) readjson=mem.cache(json2taskdata.readjson,mmap_mode='r') create_task_data= mem.cache(json2taskdata.create_task_data) count_occurrances=mem.cache(json2taskdata.count_occurrances) count_labels=mem.cache(json2taskdata.count_labels) annotators=set() lectask=[] #------------------------------------------------------------------------------- # for each annotator in group tpl #------------------------------------------------------------------------------- for stditem in tpl: aname=stditem.split('.')[0][3:][-2:] annotators.add(aname) lecdict=readjson(stditem) newlectask= create_task_data(lecdict,task_type=task_type,annotator=aname) label_data=json2taskdata.create_labels_list(newlectask) abscount=count_occurrances(str(label_data)) yaml.dump(abscount,open(os.path.join( datadir,'abscount-'+aname+'.yaml'),'w')) setcount=count_labels(newlectask) yaml.dump(setcount,open(os.path.join( datadir,'setcount-'+aname+'.yaml'),'w')) lectask=lectask+newlectask task=AnnotationTask(data=lectask,distance=nltk.metrics.distance.masi_distance_mod) return {frozenset(annotators): Agree(task.kappa(),task.alpha(),task.avg_Ao())}
if single_edge2 not in edges_current_annot: if (anonym_annot,edge2,dummy_label) not in iaa_data: # to avoid duplicates iaa_data.append((anonym_annot, single_edge2, dummy_label)) else: # Disagreemnts on edge (and consequently also on label) if edge2 not in summed_results[annotator][text]: if (anonym_annot,edge2,dummy_label) not in iaa_data: # to avoid duplicates iaa_data.append((anonym_annot, edge2, dummy_label)) return iaa_data #text = "text3" #annotators = ["beata", "elena", "julia"] # "text3" text = "text6" annotators = ["beata", "julia","mats"] # "text6" dummy_label = frozenset(["CORR"]) flexible = False add_missing = False # True = V1, False = V2 iaa_data = create_iaa_data(summed_results, text, annotators, dummy_label, flexible, add_missing) #print iaa_data[:3] task = AnnotationTask(data=iaa_data,distance=jaccard_distance) print "**** Inter-annotator agreement for", text, "****" print "Avg agreement:\t\t\t\t", round(task.avg_Ao(),3) # Average observed agreement across all coders and items. print "Fleiss (multi_kappa):\t\t", round(task.multi_kappa(),3) # (Davies and Fleiss 1982) print "Krippendorff's alpha:\t\t", round(task.alpha(),3) # (Krippendorff 1980)
def annotation(output): t = AnnotationTask(data=[x.split() for x in open(output)]) print "\nAverage observed agreement: " + str(t.avg_Ao()) print "\nKappa: " + str(t.kappa())
# number of raters != 2 continue ata = align_annot_task(task) ata.sort(key=itemgetter(1)) t = AnnotationTask(ata) same = 0 diff = 0 for key in set([t[1] for t in ata]): r1, r2 = [t for t in ata if t[1] == key] if r1[2] == r2[2]: same += 1 else: diff += 1 print('- - - {} - - -'.format(label)) print('Agreement on: {}/{}'.format(same, same + diff)) print('Average observed agreement: {}'.format(t.avg_Ao())) print('Krippendorff\'s alpha: {}'.format(t.alpha())) if len(set([t[0] for t in task])) == 2: # number of raters = 2 type_arr1 = [] type_arr2 = [] att = align_annot_task(annot_task_type) att.sort(key=itemgetter(1)) for key in set([t[1] for t in att]): r1, r2 = [t for t in att if t[1] == key] type_arr1.append(r1[2]) type_arr2.append(r2[2]) cm = ConfusionMatrix(type_arr1, type_arr2) types = ['claim', 'ne', 'example', 'other']
def annotation(output): t = AnnotationTask(data=[x.split() for x in open(output)]) print "\nAverage observed agreement: " + str(t.avg_Ao()) print "\nKappa: " + str(t.kappa());