def rater_score(filename, marks, incorr_files): human_raters = [] auto_rate = [] marks_dict = {} for (sol, mark) in marks.iteritems(): sol = sol.split('_')[-1][1:] marks_dict[sol] = mark cluster_dict = {} with open(filename, 'r') as fin: for line in fin.readlines(): line = line.strip('\n') vals = line.split(',') sol = vals[0] cluster_vals = vals[1:] cluster_dict[sol] = map(float, cluster_vals) n_humans = len(cluster_vals) for x in xrange(n_humans): human_raters.append([]) for sol in incorr_files: sol = sol.split('/')[-1].split('_')[-1][1:] vals = cluster_dict[sol] for x in xrange(n_humans): human_raters[x].append(vals[x]) auto_rate.append(marks_dict[sol]) ##Calculating Score tasks_humans = [[[j, str(i), str(human_raters[j][i])] for i in range(len(human_raters[0]))] for j in xrange(len(human_raters))] tasks_humans = [item for sublist in tasks_humans for item in sublist] #print tasks_humans total_raters = human_raters + [auto_rate] tasks_auto = [[[j, str(i), str(total_raters[j][i])] for i in range(len(total_raters[0]))] for j in xrange(len(total_raters))] tasks_auto = [item for sublist in tasks_auto for item in sublist] print "\n\nAgreement Rate with only human ratings\n" ratingtask_humans = agreement.AnnotationTask(data=tasks_humans) print("kappa " + str(ratingtask_humans.kappa())) print("fleiss " + str(ratingtask_humans.multi_kappa())) print("alpha " + str(ratingtask_humans.alpha())) print("scotts " + str(ratingtask_humans.pi())) print "\n\nAgreement Rate with our ratings included \n" ratingtask_auto = agreement.AnnotationTask(data=tasks_auto) print("kappa " + str(ratingtask_auto.kappa())) print("fleiss " + str(ratingtask_auto.multi_kappa())) print("alpha " + str(ratingtask_auto.alpha())) print("scotts " + str(ratingtask_auto.pi()))
def rater_agreement(rates): taskdata = [] ratingtask = None for j in range(0, 10): taskdata += [[j, i, rates[j][i]] for i in range(len(rates[j]))] ratingtask = agreement.AnnotationTask(data=taskdata) return ratingtask
def get_agreement(dict_list_out, collapse_relations = False, v=True, disable_kappa=False): agreement_dict = dict() if collapse_relations != False: dict_list_out = get_collapsed_relations(dict_list_out, collapse_relations) matrix = create_matrix(dict_list_out) ratingtask = agreement.AnnotationTask(data=matrix) alpha = ratingtask.alpha() prop = proportional_agreement_pairs(matrix) #average_kappa = get_average_kappa(matrix) # Calculate kappa by file (not over entire set) total_kappa = 0.0 data_by_file = sort_by_key(dict_list_out, ['completionurl']) for f, d_list in data_by_file.items(): matrix = create_matrix(d_list) if disable_kappa == False: kappa = get_average_kappa(matrix) if np.isnan(kappa): kappa = 0.0 total_kappa += kappa else: kappa = '-' if total_kappa != 0.0 and len(data_by_file) != 0 and kappa != '-': average_kappa = total_kappa/len(data_by_file) else: average_kappa = '-' if v == True: print(f"Krippendorff's alpha: {alpha}") print(f"Average Cohen's Kappa (pairwise): {average_kappa}") print(f"Proportional agreement (pairwise): {prop}") print() agreement_dict['Krippendorff'] = alpha agreement_dict['Proportional'] = prop agreement_dict['Av_Cohens_kappa'] = average_kappa return agreement_dict
def get_alpha(dict_list_out, collapse_relations = False): if collapse_relations != False: dict_list_out = get_collapsed_relations(dict_list_out, collapse_relations) matrix = create_matrix(dict_list_out) ratingtask = agreement.AnnotationTask(data=matrix) alpha = ratingtask.alpha() return alpha
def scotts(rks, top_k): rks = [rk[:top_k] for rk in rks] taskdata = [] for j, rk in enumerate(rks): taskdata += [[j, str(i), str(rk[i])] for i in range(0, len(rk))] ratingtask = agreement.AnnotationTask(data=taskdata) return ratingtask.pi()
def compute_alpha_old(primary_labler, classifier_folders, primary_folder, secondary_folders, mode ='relevant'): taskdata = [] taskdata_counter = 0 for query_folder_name in os.listdir(classifier_folders + primary_folder): if query_folder_name.endswith('.csv'): continue query_primary_full_path = classifier_folders + primary_folder + query_folder_name opinion_folders = {x: classifier_folders + y +'\\'+query_folder_name + '\\' for x,y in secondary_folders.items()} for queries_file_name in os.listdir(query_primary_full_path): first_opinion_categories = get_opinion_categories(query_primary_full_path + '\\' + queries_file_name) other_classifications = get_opinions(opinion_folders=opinion_folders, queries_file_name=queries_file_name) for url, row in first_opinion_categories.items(): taskdata_counter +=1 first_opinion_category = int(row['category'].strip()) opinions = get_all_opinions(url, other_classifications) opinions[primary_labler] = first_opinion_category if mode == 'relevant': ratings = { x:y for x,y in opinions.items() if y > 0} else: ratings = opinions if len(ratings.keys()) <2: continue for name, value in ratings.items(): cls_name = name if name.endswith('2'): cls_name = name[:-1] taskdata.append((name, str(taskdata_counter), value)) # assert('Sigal' in opinions and 'Yael' in opinions) ratingtask = agreement.AnnotationTask(data=taskdata, distance = my_interval_distance) # ratingtask = agreement.AnnotationTask(data=taskdata, distance = interval_distance) print("alpha " + str(ratingtask.alpha()))
def getKappa(self, taskdata): ratingtask = agreement.AnnotationTask(data=taskdata) # if(float(ratingtask.kappa()) >= 0.4): # print("kappa: %.2f" % (ratingtask.kappa())) print("fleiss: %.2f " % (ratingtask.multi_kappa())) print("alpha: %.2f " % (ratingtask.alpha())) print("scotts: %.2f " % (ratingtask.pi())) # return True; return float(ratingtask.kappa())
def compute_irr(self, dataset: IRRDataset): formatted_codes = [] for i in range(0, dataset.coders): coder_data = dataset.get_coder(i) data_formatted = [[i, n, coder_data[n]] for n in range(len(coder_data))] formatted_codes.extend(data_formatted) ratingtask = agreement.AnnotationTask(data=formatted_codes) return ratingtask.pi()
def krippendorffs_alpha(d1, d2, d3, task): if task == 'power': r1 = d1['Power'].values.tolist() r2 = d2['Power'].values.tolist() r3 = d3['Power'].values.tolist() taskdata = [[0, str(i), str(r1[i])] for i in range(0, len(r1))] + [ [1, str(i), str(r2[i])] for i in range(0, len(r2)) ] + [[2, str(i), str(r3[i])] for i in range(0, len(r3))] ratingtask = agreement.AnnotationTask(data=taskdata) return str(ratingtask.alpha()) elif task == 'agency': r1 = d1['Agency'].values.tolist() r2 = d2['Agency'].values.tolist() r3 = d3['Agency'].values.tolist() taskdata = [[0, str(i), str(r1[i])] for i in range(0, len(r1))] + [ [1, str(i), str(r2[i])] for i in range(0, len(r2)) ] + [[2, str(i), str(r3[i])] for i in range(0, len(r3))] ratingtask = agreement.AnnotationTask(data=taskdata) return str(ratingtask.alpha()) else: rs1 = d1['Subj'].values.tolist() rs2 = d2['Subj'].values.tolist() rs3 = d3['Subj'].values.tolist() ro1 = d1['Obj'].values.tolist() ro2 = d2['Obj'].values.tolist() ro3 = d3['Obj'].values.tolist() taskdata_1 = [[0, str(i), str(ro1[i])] for i in range(0, len(ro1))] + [ [1, str(i), str(ro2[i])] for i in range(0, len(ro2)) ] + [[2, str(i), str(ro3[i])] for i in range(0, len(ro3))] ratingtask_1 = agreement.AnnotationTask(data=taskdata_1) taskdata_2 = [[0, str(i), str(ro1[i])] for i in range(0, len(rs1))] + [ [1, str(i), str(rs2[i])] for i in range(0, len(rs2)) ] + [[2, str(i), str(rs3[i])] for i in range(0, len(rs3))] ratingtask_2 = agreement.AnnotationTask(data=taskdata_2) return str(ratingtask_1.alpha()), str(ratingtask_2.alpha())
def compute_alpha(queries_file, classifier_folders): q_to_cls = {} taskdata = [] taskdata_counter = 0 with open(queries_file, 'r', encoding='utf-8', newline='') as queries_csv: queries = csv.DictReader(queries_csv) for row in queries: query_folder_name = row['long query'] label_url = row['pubmed'] print(row['long query']) opinion_folders = {x: y + '\\' +query_folder_name + '\\' for x,y in classifier_folders.items()} opinions = {} for name, folder in opinion_folders.items(): if not os.path.exists(folder): continue for queries_file_name in os.listdir(folder): if not queries_file_name.endswith('.csv'): continue if not queries_file_name in opinions: opinions[queries_file_name] = {} annotations = get_opinion_categories(folder+queries_file_name) for url, row in annotations.items(): if url == label_url: if not row['category']: continue if not query_folder_name in q_to_cls: q_to_cls[query_folder_name] = {} cls_name = name[:-1] if name.endswith('2') else name category = int(row['category'].strip()) q_to_cls[query_folder_name][cls_name] = category if category > 0: taskdata_counter +=1 taskdata.append((cls_name, url, category)) ratingtask = agreement.AnnotationTask(data=taskdata, distance = my_interval_distance) print("alpha " + str(ratingtask.alpha())) for query, opinions in q_to_cls.items(): counter = {1: 0, 2: 0, 2: 0, 3: 0, 4: 0, 5: 0} for classifier, opinion in opinions.items(): counter[opinion] += 1 sorted_stance = sorted(counter.items(), key=lambda kv: kv[1], reverse=True) if sorted_stance[0][1] == sorted_stance[1][1]: print(query) print(opinions)
def compute_inter_annotator_agreement(annotations): # First combine the instances across annotations combined_annotations = dict() for index, q, a, r, label, assignment, worker, time in annotations: instance = (index, q, a, r, label) combined_annotations.setdefault(instance, list()) combined_annotations[instance].append(assignment) # Convert combined_annotations to task data task_data = list() for instance, assignments in combined_annotations.items(): index, q, a, r, label = instance shuffle(assignments) for i, assignment in enumerate(assignments): task_data.append((str(i), str(index) + label, assignment)) ratingtask = agreement.AnnotationTask(data=task_data) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi()))
def rater_agreement_test(): logging.info("Load agreement") from nltk import agreement logging.info("Start computing agreement") rater1 = [1, 1, 1] rater2 = [1, 1, 0] rater3 = [0, 1, 1] taskdata = [[0, str(i), str(rater1[i])] for i in range(0, 3)] + [ [1, str(i), str(rater2[i])] for i in range(0, 3) ] + [[2, str(i), str(rater3[i])] for i in range(0, 3)] print(taskdata) #taskdata needs to be in format: [(coder1,item1,label), (coder1,item2,label), (coder2,item1,label), (coder2,item2,label) ....] #all labels from coder1 first, then all labels from coder2 and so on. ratingtask = agreement.AnnotationTask(data=taskdata) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi()))
def main(input_file, delimiter=';'): data = [] is_cohen = False is_fleiss = False with open(input_file, 'r', encoding='utf-8') as input_stream: reader = csv.DictReader(input_stream, delimiter=delimiter) n_annotators = len(reader.fieldnames[1:]) if n_annotators == 1: raise ValueError('Only one annotator.') else: is_cohen = n_annotators == 2 is_fleiss = not is_cohen for token_idx, line in enumerate(reader): annotations = [line[key] for key in reader.fieldnames[1:]] if len(annotations) != n_annotators: raise ValueError( f"Wrong number of annotators for token {token_idx}. Expected : {n_annotators}, got: {len(annotations)}" ) if token_idx == 0: print(f"Annotator number: {n_annotators}") for i, annotation in enumerate(annotations): data.append([i, str(token_idx), int(annotation)]) ratingtask = agreement.AnnotationTask(data=data, distance=binary_distance) print() if is_cohen: kappa_cohen = ratingtask.kappa() print(f"Cohen's κ: {kappa_cohen}") print(qualify_agreement(kappa_cohen)) elif is_fleiss: kappa_fleiss = ratingtask.multi_kappa() print(f"Fleiss' κ: {kappa_fleiss}") print(qualify_agreement(kappa_fleiss))
def compute_fleiss_kappa(eval_pred, eval_true, num_categs, group=None): """ eval_pred: a list of lists of all predictions for race or gender for each model eval_true: a list of lists of all ground truth labels for race or gender, in the same order as the pred list, for each model num_categs: int for all possible category labels (e.g., 2 for gender, 4 for race) group: an int describing the group (e.g. 0 for Female, 1 for Male, etc.) to compute the Fleiss-Kappa score over """ """ num_subjects = len(np.where(np.array(eval_true)[0]==group)[0]) fleiss_inputs = np.zeros((num_subjects, num_categs)) for i,pred in enumerate(eval_pred): idx = 0 for j,p in enumerate(pred): if eval_true[i][j] == group: fleiss_inputs[idx][p] += 1 idx += 1 #for r in range(num_subjects): # print(fleiss_inputs[r]) return fleiss_kappa(fleiss_inputs) """ fleiss_inputs = [] for model, preds in enumerate(eval_pred): labels = [] for example, pred in enumerate(preds): if group is None: labels.append([model + 1, example, pred]) else: if eval_true[model][example] == group: labels.append([model + 1, example, pred]) fleiss_inputs.extend(labels) ratingtask = agreement.AnnotationTask(data=fleiss_inputs) return ratingtask.multi_kappa()
def krippendorff_metric_study(case_, additional_measures): data_raw = pd.read_csv("data/" + case_ + ".csv") data_vectors = {} rounds_ = data_raw['timeunit'].unique() delegates = data_raw['name'].unique() print("Loaded {rounds} sample rounds".format(rounds=len(rounds_))) print("Rounds contain {delegates} delegates".format( delegates=len(delegates))) data_new = pd.DataFrame(0, index=rounds_, columns=delegates) for round_ in rounds_: data_vectors[round_] = list( data_raw[data_raw['year'] == round_]['name']) for round_ in rounds_: for name_ in data_vectors[round_]: data_new[name_][round_] = 1 alpha = ka.alpha(data_new) print("k'aplpha " + str(alpha)) # TODO: test this if additional_measures: data_new_nltk = [] for round_ in rounds_: for name_ in delegates: data_new_nltk.append([round_, name_, data_new[name_][round_]]) ratingtask = agreement.AnnotationTask(data=data_new_nltk) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("scotts " + str(ratingtask.pi())) return alpha
def AnnotationStats(a1,a2,a3): literal = 0 nonliteral = 0 exceptions = ["am", "are", "is", "were", "was", "been", "be", "being", "has", "have", "had", "do", "done", "compared", "aware","used","said", "overlap", "noted","think", "Performing","go","seems","uses","says","raising","appear","received","boost","reciting","reprove","responded","leave"] ### Annotator 1 annotator1 = etree.parse(a1) ### all tags tags1 = list(annotator1.iter()) verbs1 = [e for e in tags1 if e.tag=='VERB'] for verb in verbs1: if verb.attrib['type']=='Literal': literal+=1 else: nonliteral +=1 args1 = [e for e in tags1 if e.tag=='ARG'] mm1 = [e for e in tags1 if e.tag=='MISMATCH'] with open('Annotator1.csv', 'w') as f: writer = csv.writer(f, delimiter=",") writer.writerow(("Verb","Type","Mismatched Args")) for e in verbs1: writer.writerow((e.attrib['text'],e.attrib['type'],[x.attrib['fromText'] for x in mm1 if int(x.attrib['toID'][1:])==int(e.attrib['id'][1:])])) f.close() ### shared tags shared1 = {e: (e.attrib['spans'].split('~'), e.attrib['text']) for e in tags1 if e.tag=='VERB' and int(e.attrib['id'][1:]) > 233 and e.attrib['text'] not in exceptions} shared1 = sorted(shared1, key= lambda x: shared1[x][0][0]) verbids1 = [e.attrib['id'] for e in shared1] mismatches1 = [e for e in annotator1.iter() if e.tag=='MISMATCH'] mismatches1 = [e for e in mismatches1 if e.attrib['toID'] in verbids1] ### Annotator 2 annotator2 = etree.parse(a2) ### all tags tags2 = list(annotator2.iter()) verbs2 = [tag for tag in tags2 if tag.tag == 'VERB'] for verb in verbs2: if verb.attrib['type'] == 'Literal': literal += 1 else: nonliteral += 1 args2 = [tag for tag in tags2 if tag.tag == 'ARG'] mm2 = [tag for tag in tags2 if tag.tag == 'MISMATCH'] with open('Annotator2.csv', 'w') as f: writer = csv.writer(f, delimiter=",") writer.writerow(("Verb", "Type", "Mismatched Args")) for e in verbs2: writer.writerow((e.attrib['text'], e.attrib['type'], [x.attrib['fromText'] for x in mm2 if 'toID' in x.attrib and int(x.attrib['toID'][1:]) == int( e.attrib['id'][1:])])) f.close() ### shared tags shared2 = {e: (e.attrib['spans'].split('~'), e.attrib['text']) for e in tags2 if e.tag == 'VERB' and int(e.attrib['id'][1:]) > 192 and e.attrib['text'] not in exceptions} shared2 = sorted(shared2, key=lambda x: shared2[x][0][0]) verbids2 = [e.attrib['id'] for e in shared2] mismatches2 = [e for e in annotator2.iter() if e.tag == 'MISMATCH' and 'toID' in e.attrib] ### Annotator 3 annotator3 = etree.parse(a3) ### all tags tags3 = list(annotator3.iter()) verbs3 = [tag for tag in tags3 if tag.tag == 'VERB'] for verb in verbs3: if verb.attrib['type'] == 'Literal': literal += 1 else: nonliteral += 1 args3 = [tag for tag in tags3 if tag.tag == 'ARG'] mm3 = [tag for tag in tags3 if tag.tag == 'MISMATCH'] with open('Annotator3.csv', 'w') as f: writer = csv.writer(f, delimiter=",") writer.writerow(("Verb", "Type", "Mismatched Args")) for e in verbs3: writer.writerow((e.attrib['text'], e.attrib['type'], [x.attrib['fromText'] for x in mm3 if int(x.attrib['toID'][1:]) == int( e.attrib['id'][1:])])) f.close() shared3 = {e: (e.attrib['spans'].split('~'), e.attrib['text']) for e in tags3 if e.tag == 'VERB' and int(e.attrib['id'][1:]) > 235 and e.attrib['text'] not in exceptions} shared3 = sorted(shared3, key=lambda x: shared3[x][0][0]) verbids3 = [e.attrib['id'] for e in shared3] mismatches3 = [e for e in annotator3.iter() if e.tag == 'MISMATCH'] mismatches3 = [e for e in mismatches3 if e.attrib['toID'] in verbids3] ## Inter-Annotator Agreement verbentities = zip(shared1, shared2, shared3) tupls = [] index = 0 with open('IAACalculations.csv', 'w') as f: writer = csv.writer(f, delimiter=",") writer.writerow(("Verb","Annotator 1","Annotator 2","Annotator 3")) for e in verbentities: verb = e[0].attrib['text'] type1 = e[0].attrib['type'] type2 = e[1].attrib['type'] type3 = e[2].attrib['type'] tupls.extend( [ ('a1', frozenset([verb]),frozenset([type1])), ('a2', frozenset([verb]),frozenset([type2])), ('a3', frozenset([verb]),frozenset([type3]))]) writer.writerow( (e[0].attrib['text'],e[0].attrib['type'],e[1].attrib['type'],e[2].attrib['type'] ) ) if type1=='Nonliteral' and type2=='Nonliteral'and type3=='Nonliteral': mismatch1 = [(x.attrib['fromText'] for x in mismatches1 if x.attrib['toID']== e[0].attrib['id']] mismatch2 = [x.attrib['fromText'] for x in mismatches2 if x.attrib['toID'] == e[1].attrib['id']] mismatch3 = [x.attrib['fromText'] for x in mismatches3 if x.attrib['toID'] == e[2].attrib['id']] tupls.extend([('a1', frozenset([index]),frozenset(mismatch1)),('a2', frozenset([index]),frozenset(mismatch2)),('a3', frozenset([index]), frozenset(mismatch3))]) index +=1 writer.writerow( (e[0].attrib['text'], mismatch1, mismatch2, mismatch3)) f.close() ### Calculating IAA t = agreement.AnnotationTask(data=tupls,distance=masi_distance) print("\nAnnotation Statistics") print("Literal tags: %d" %(literal)) print("Nonliteral tags: %d\n" % (nonliteral)) print("Agreement") print("Observed Agreement: %.2f" %(t.avg_Ao())) print("Alpha: %.2f" % (t.alpha())) print("S measure: %.2f" %(t.S())) print("Pi: %.2f" %(t.pi())) if __name__ == "__main__": AnnotationStats('Annotator1Final.xml','Annotator2Final.xml','Annotator3Final.xml')
def get_scores(): if not session.get('logged_in'): abort(401) scores = db_handler.get_scores(session['user']['last_dataset'], session['user']['last_annotation']) user_idx = -1 for idx, val in enumerate(scores): if str(val['_id']) == session['user_id']: user_idx = idx break if user_idx == -1: return json_util.dumps({'results': []}) annotations_scores = [] for score in scores: annotations_scores.append( dict(zip(score['annotations'], [x[-1] for x in score['scores']]))) results = [] for idx, val in enumerate(scores): if idx == user_idx: continue user = dict() scored_idx = set(annotations_scores[user_idx].keys()) & set( annotations_scores[idx].keys()) a = [annotations_scores[user_idx][x] for x in scored_idx] b = [annotations_scores[idx][x] for x in scored_idx] scorer1 = { 'histologicType': [], 'initialScore': [], 'mitosis': [], 'lumen': [], 'nuclear': [] } scorer2 = { 'histologicType': [], 'initialScore': [], 'mitosis': [], 'lumen': [], 'nuclear': [] } for x, y in zip(a, b): scorer1['histologicType'].append(x['histologicType']) scorer2['histologicType'].append(y['histologicType']) scorer1['initialScore'].append(x['initialScore']) scorer2['initialScore'].append(y['initialScore']) scorer1['mitosis'].append(x['mitosis']['overall']) scorer2['mitosis'].append(y['mitosis']['overall']) scorer1['lumen'].append(x['lumen']['overall']) scorer2['lumen'].append(y['lumen']['overall']) scorer1['nuclear'].append(round(x['nuclear']['overall'])) scorer2['nuclear'].append(round(y['nuclear']['overall'])) for v, w in zip(x['mitosis']['data'], y['mitosis']['data']): scorer1['mitosis'].append(v['score']) scorer2['mitosis'].append(w['score']) for v, w in zip(x['lumen']['data'], y['lumen']['data']): scorer1['lumen'].append(v['score']) scorer2['lumen'].append(w['score']) for v, w in zip(x['nuclear']['data'], y['nuclear']['data']): scorer1['nuclear'].append(v['score']) scorer2['nuclear'].append(w['score']) index = 0 data = [[0, str(i + index), str(scorer1['histologicType'][i])] for i in range(len(scorer1['histologicType']))] + \ [[1, str(i + index), str(scorer2['histologicType'][i])] for i in range(len(scorer2['histologicType']))] rating = agreement.AnnotationTask(data=data) histo_sim = rating.alpha() index += len(scorer1['histologicType']) data = [[0, str(i + index), str(scorer1['initialScore'][i])] for i in range(len(scorer1['initialScore']))] + \ [[1, str(i + index), str(scorer2['initialScore'][i])] for i in range(len(scorer2['initialScore']))] rating = agreement.AnnotationTask(data=data) initial_sim = rating.alpha() index + len(scorer1['initialScore']) data = [[0, str(i + index), str(scorer1['mitosis'][i])] for i in range(len(scorer1['mitosis']))] + \ [[1, str(i + index), str(scorer2['mitosis'][i])] for i in range(len(scorer2['mitosis']))] rating = agreement.AnnotationTask(data=data) mitosis_sim = rating.alpha() index + len(scorer1['mitosis']) data = [[0, str(i + index), str(scorer1['lumen'][i])] for i in range(len(scorer1['lumen']))] + \ [[1, str(i + index), str(scorer2['lumen'][i])] for i in range(len(scorer2['lumen']))] rating = agreement.AnnotationTask(data=data) tubular_sim = rating.alpha() index + len(scorer1['lumen']) data = [[0, str(i + index), str(scorer1['nuclear'][i])] for i in range(len(scorer1['nuclear']))] + \ [[1, str(i + index), str(scorer2['nuclear'][i])] for i in range(len(scorer2['nuclear']))] rating = agreement.AnnotationTask(data=data) nuclear_sim = rating.alpha() total_sim = (histo_sim + initial_sim + mitosis_sim + tubular_sim + nuclear_sim) / 5 user['users'] = val['users'] user['similarity'] = { 'histo_sim': histo_sim, 'initial_sim': initial_sim, 'mitosis_sim': mitosis_sim, 'tubular_sim': tubular_sim, 'nuclear_sim': nuclear_sim, 'total_sim': total_sim } results.append(user) if len(scores) < 3: return json_util.dumps({'results': results}) else: scored_idx = set() for idx, val in enumerate(scores): if len(scored_idx) == 0: scored_idx = set(val['annotations']) else: scored_idx &= set(val['annotations']) scorers = [] for idx, val in enumerate(scores): scorer1 = { 'histologicType': [], 'initialScore': [], 'mitosis': [], 'lumen': [], 'nuclear': [] } a = [annotations_scores[idx][x] for x in scored_idx] for x in a: scorer1['histologicType'].append(x['histologicType']) scorer1['initialScore'].append(x['initialScore']) scorer1['mitosis'].append(x['mitosis']['overall']) scorer1['lumen'].append(x['lumen']['overall']) scorer1['nuclear'].append(round(x['nuclear']['overall'])) for v in x['mitosis']['data']: scorer1['mitosis'].append(v['score']) for v in x['lumen']['data']: scorer1['lumen'].append(v['score']) for v in x['nuclear']['data']: scorer1['nuclear'].append(v['score']) scorers.append(scorer1) data_histo, data_initial, data_mitosis, data_tubular, data_nuclear = [], [], [], [], [] for idx, s in enumerate(scorers): data_histo += [[idx, str(i), str(s['histologicType'][i])] for i in range(len(s['histologicType']))] data_initial += [[idx, str(i), str(s['initialScore'][i])] for i in range(len(s['initialScore']))] data_mitosis += [[idx, str(i), str(s['mitosis'][i])] for i in range(len(s['mitosis']))] data_tubular += [[idx, str(i), str(s['lumen'][i])] for i in range(len(s['lumen']))] data_nuclear += [[idx, str(i), str(s['nuclear'][i])] for i in range(len(s['nuclear']))] rating = agreement.AnnotationTask(data=data_histo) histo_sim = rating.alpha() rating = agreement.AnnotationTask(data=data_initial) initial_sim = rating.alpha() rating = agreement.AnnotationTask(data=data_mitosis) mitosis_sim = rating.alpha() rating = agreement.AnnotationTask(data=data_tubular) tubular_sim = rating.alpha() rating = agreement.AnnotationTask(data=data_nuclear) nuclear_sim = rating.alpha() total_sim = (histo_sim + initial_sim + mitosis_sim + tubular_sim + nuclear_sim) / 5 total = [{ 'histo_sim': histo_sim, 'initial_sim': initial_sim, 'mitosis_sim': mitosis_sim, 'tubular_sim': tubular_sim, 'nuclear_sim': nuclear_sim, 'total_sim': total_sim }] return json_util.dumps({'results': results, 'total': total})
def textanalyse(request): try: TaskID = request.GET['Task_ID'] # GET TASK ID FOR FIND ANALYSIS lables = [] classid = [] t = Task.objects.all() for t1 in t: if t1.id == int(TaskID): noOfannotationneed = t1.requiredNumofAnnotations # GET HOW MANY ANNOTATION NEED FOR ONE TASK d1 = DataClass.objects.all() a1 = AnnotationDataSet.objects.all() ar = AnnotationDataSetresult.objects.all() noOdata = 0 datainstance = [] for j in d1: if j.taskID_id == int(TaskID): lables.append( j.cateogaryName ) # TAKE ALL CATOGORIES NAME IN A LIST (EG: CAT,DOG,OTHERS) classid.append( j.cateogaryTag ) # TAKE ALL CATOGARY TAG IN A LIST (EG: CAT=0, DOG=1, OTHERS =2) for k in a1: if k.taskID_id == int(TaskID): noOdata += 1 # FIND HOW MANY INSTANCE IN ONE TASK datainstance.append(k.id) # TAKE ALL INSTANCE ID IN A LIST result1 = [] annotator = [] noOfannotation = [] noOfannotationnum = 0 task = [] for l in datainstance: for lm in ar: if l == lm.DataInstance_id: # CHECK WHETHER THAT DATA INSTANT IN ANNOTATION RESULT SET task.append( [lm.UserID, str(l), str(lm.ClassID)] ) # TAKE THAT ISTANCE DETAILS LIKE ANNOTATOR ID , DATA ID, AND ANNOTATION RESULT TAG AS A NESTED LIST FOR EVERY INSTANCE result1.append(lm.ClassID) # TAKE TAG ID AS A LIST annotator.append(lm.UserID) # TAKE USER ID AS A LIST for ml in a1: if l == ml.id: noOfannotation.append( ml.NumberOfAnnotations ) # GET NO OF ANNOTATION DONE FOR ONE INSTANCE noOfannotationnum += ml.NumberOfAnnotations # GET TOTAL NUMBER OF ANNOTATION qn = [] lenth = 0 for q in noOfannotation: co1 = [] for co in classid: # FIND THE CLASS TAG FOR ONE INSTANCE BY DIFFERENT ANNOTATORS co1.append((result1[lenth:lenth + q].count(co))) lenth += q if max(co1) is 0: qn.append('None') else: qn.append( lables[co1.index(max(co1))] ) # ANALYSE WHICH CLASS IS THAT DATA INSTANCE MOST LIKELY USING ALL ANNOTATORS RESULT m = [] for data in lables: m.append( qn.count(data) ) # FIND THE COUNT OF EVERY CLASS CATOGARY DATA INSTANCE IN THAT TASK totneed = noOdata * noOfannotationneed # FIND FULL COUNT FOR ANNOTATION FOR TASK totdone = noOfannotationnum # FIND FINSHED ANNOTATION COUNT process = ("%.2f" % round((totdone / totneed) * 100, 2) ) # FIND THE PROGRESS PERCENTATION FOR DONE TASK r = [] # taskdata=[[0,str(i),str(rater1[i])] for i in range(0,len(rater1))]+[[1,str(i),str(rater2[i])] for i in range(0,len(rater2))]+[[2,str(i),str(rater3[i])] for i in range(0,len(rater3))] ratingtask = agreement.AnnotationTask( data=task ) # INPUT LIST TO FIND INTER ANNOTATOR AGREMENT IN NLTK INBUILT FUNCTION try: r.append(str("%.4f" % round(ratingtask.kappa(), 4))) except: r.append("Can't calculate just now with this result.") try: r.append(str("%.4f" % round(ratingtask.multi_kappa(), 4))) except: r.append("Can't calculate just now with this result.") try: r.append(str("%.4f" % round(ratingtask.alpha(), 4))) except: r.append("Can't calculate just now with this result.") try: r.append(str("%.4f" % round(ratingtask.pi(), 4))) except: r.append("Can't calculate just now with this result.") data = { 'labels': lables, 'data': m, 'r': noOdata, 'k': r[0], 'f': r[1], 'a': r[2], 's': r[3], 'process': process } return render(request, 'analyse/textdataanalyse.html', data) except: return HttpResponse('Task Not Found')
net_average_scores = np.zeros((4)) for gen in gen2id.keys(): taskdata = [] points = 0 cnt = 0 for i, annotation in enumerate(annotations): for j in range(len(annotation)): taskdata.append([ annotators[i], str(j), get_tag(annotation[j, gen2id[gen]]) ]) points += annotation[j, gen2id[gen]] cnt += 1 net_average_scores[gen2id[gen]] = points / cnt ratingtask = agreement.AnnotationTask(data=taskdata) cohen_kappa = ratingtask.kappa() fleiss_kappa = ratingtask.multi_kappa() cohen_kappas[gen] = cohen_kappa fleiss_kappas[gen] = fleiss_kappa print(net_average_scores) print("Cohen Kappas: "******"--------------------------") print("Fleiss Kappas: "******"--------------------------") with open(os.path.join(PICKLE_PATH, args.dataset, 'cohen.pkl'), 'wb') as pk: pickle.dump(cohen_kappas, pk)
def merge_google_labels(dir_name, classifiers): labels = {} for (name, file) in classifiers: path = dir_name + file with open(path, 'r', encoding='utf-8', newline='') as label_file: label_file_dict = csv.DictReader(label_file) for row in label_file_dict: q = row['short query'] value = int(row['Google_value']) if q not in labels: labels[q] = {} labels[q]['short query'] = q labels[q][name] = value if 'query' in row: labels[q]['long query'] = row['query'] else: assert name not in labels[q] labels[q][name] = value if 'query' in row: assert(labels[q]['long query'] == row['query']) with open(dir_name + 'google_labels_merged.csv', 'w', encoding='utf-8', newline='') as merged: fieldnames = ['short query', 'long query', 'Google'] names = [x[0] for x in classifiers] fieldnames.extend(names) merged_writer = csv.DictWriter(merged, fieldnames=fieldnames) merged_writer.writeheader() taskdata = [] taskdata_counter = 0 for row in labels.values(): taskdata_counter+=1 barak_label = row['barak'] for name in names: if name not in row: print ('no label for ' + name + ' for query ' + row['short query']) row[name] = 0 if barak_label == -1: if (name in row) and (row[name] > 0): print('Error') print(name) print(row['short query']) else: rating = row[name] if row[name] > 0 else 0 taskdata.append((name, str(taskdata_counter), rating)) values = [row[x] for x in names] values.sort() if values[1] == values[0]: row['Google'] = values[0] elif values[1] == values[2]: row['Google'] = values[2] else: row['Google'] = -10 print('DISAGREEMENT on ' + row['short query']) merged_writer.writerow(row) ratingtask = agreement.AnnotationTask(data=taskdata, distance = interval_distance) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi()))
cohen_kappa_score(list(result['best_w2v']), list(result['best_ft'])) cohen_kappa_score(list(result['best_ft']), list(result['best_glove'])) #%% from nltk import agreement # we obtain Fleiss kappa between the three models coder1 = list(result['best_w2v']) coder2 = list(result['best_ft']) coder3 = list(result['best_glove']) formatted_codes = [[1, i, coder1[i]] for i in range(len(coder1))] + [ [2, i, coder2[i]] for i in range(len(coder2)) ] + [[3, i, coder3[i]] for i in range(len(coder3))] ratingtask = agreement.AnnotationTask(data=formatted_codes) print('Fleiss\'s Kappa:', ratingtask.multi_kappa()) print('Krippendorff\'s alpha:', ratingtask.alpha()) print('Scott\'s pi:', ratingtask.pi()) # percetage_agreement = (len(result[(result['best_w2v'] == result['best_ft'])])/len(result))*100 # print('Percentage of agreement:',percetage_agreement) r = result['best_w2v_value'] - result['best_glove_value'] result.head() np.array(result['best_ft_value']) """Fleiss's Kappa: 0.624885205206788 Krippendorff's alpha: 0.6242742329246845
def multi_kappa(data): ratingtask = agreement.AnnotationTask(data=data) return ratingtask.multi_kappa()
def analyze_interrater_reliability(): data_folders = [ 'phase_2/user4_204_20210303-133107-9152', 'phase_2/user1_204_20210301-141046-2142', 'phase_2/user1_204_20210301-143153-9220', 'phase_2/user5_204_20210302-142915-8332', 'phase_2/user5_204_20210302-145812-8332', 'phase_2/user2_204_20210308-080302-9152', 'phase_2/user3_204_20210311-101017-9152', 'phase_2/user3_204_20210311-121106-2142' ] labels_table = defaultdict(dict) chest_table = defaultdict(dict) box_table = defaultdict(dict) certainty_box_table = defaultdict(dict) index_box_table = defaultdict(dict) results_csv = pd.DataFrame() for data_folder in data_folders: print(data_folder) user = data_folder.split('/')[-1].split('_')[0] # print(user) answers, results_csv, all_coord_chest, all_coord_box, all_certainty_box, all_index_box = get_classes_from_csv( '../anonymized_collected_data/' + data_folder + '/structured_output.csv', results_csv, user, '../anonymized_collected_data/phase_2/phase_2_transcriptions_anon.csv' ) labels_table[user].update(answers) chest_table[user].update(all_coord_chest) box_table[user].update(all_coord_box) certainty_box_table[user].update(all_certainty_box) index_box_table[user].update(all_index_box) user_lists = [] # print(eng) # result_file = open("lsit_of_matlabs_command_phase_2.csv",'w') # wr = csv.writer(result_file) # for item in eng: # wr.writerow([item,]) # 1/0 for user in sorted(labels_table.keys()): trial_lists = [] trials_list = sorted(labels_table[user].keys()) for trial in trials_list: labels_list = sorted(labels_table[user][trial].keys()) label_lists = [] for label in labels_list: label_lists.append(labels_table[user][trial][label]) new_row = { 'user': user, 'label': label, 'trial': trial, 'title': 'trial_answer', 'value': labels_table[user][trial][label] } results_csv = results_csv.append(new_row, ignore_index=True) new_row = { 'user': user, 'label': label, 'trial': trial, 'title': 'trial_answer_present', 'value': (labels_table[user][trial][label] != 0) * 1 } results_csv = results_csv.append(new_row, ignore_index=True) trial_lists.append(label_lists) # print(user) # print(len(trial_lists)) user_lists.append(trial_lists) full_array = numpy.asarray(user_lists) numpy.logical_or(full_array < 0, full_array >= 3) * 1. print(full_array.shape) # print(((full_array==5)*1.).sum()) # 1/0 full_array = numpy.logical_or(full_array < 0, full_array >= 3) skip_index = labels_list.index('Skip') use_trials = np.ones(full_array.shape[1], dtype=bool) disease_labels_array = full_array[:, use_trials, :] print(disease_labels_array.shape) import random for k in range(len(labels_list)): array_to_use = disease_labels_array trials_to_use = range(1, full_array.shape[1] + 1) print(print(np.sum(array_to_use[:, :, k]))) print(np.sum(array_to_use[:, :, k], 1)) print(np.sum(array_to_use[:, :, k], 0)) formatted_codes = [[j, i, array_to_use[j, i, k]] for i in range(array_to_use.shape[1]) for j in range(array_to_use.shape[0])] ratingtask = agreement.AnnotationTask(data=formatted_codes) try: value = ratingtask.multi_kappa() except ZeroDivisionError: value = None # print(labels_list[k]) # print(value) # 1/0 new_row = { 'user': '******', 'label': labels_list[k], 'trial': 'all', 'title': 'Fleiss Kappa', 'value': value } results_csv = results_csv.append(new_row, ignore_index=True) for trial_index, trial in enumerate(trials_to_use): formatted_codes_except = [[j, i, array_to_use[j, i, k]] for i in range(array_to_use.shape[1]) for j in range(array_to_use.shape[0]) if i != trial_index] ratingtask = agreement.AnnotationTask(data=formatted_codes_except) try: value = ratingtask.multi_kappa() except ZeroDivisionError: value = None new_row = { 'user': '******', 'label': labels_list[k], 'trial': trial, 'title': 'Fleiss Kappa (except trial)', 'value': value } results_csv = results_csv.append(new_row, ignore_index=True) for user_index, user in enumerate(sorted(labels_table.keys())): formatted_codes_except = [ [j, i, array_to_use[j, i, k]] for i in range(array_to_use.shape[1]) for j in range(array_to_use.shape[0]) if i != trial_index and j != user_index ] ratingtask = agreement.AnnotationTask( data=formatted_codes_except) try: value = ratingtask.multi_kappa() except ZeroDivisionError: value = None new_row = { 'user': user, 'label': labels_list[k], 'trial': trial, 'title': 'Fleiss Kappa (except trial and user)', 'value': value } results_csv = results_csv.append(new_row, ignore_index=True) for user_index, user in enumerate(sorted(labels_table.keys())): for k in range(len(labels_list)): if labels_list[k] == 'Skip': array_to_use = full_array else: array_to_use = disease_labels_array formatted_codes_except = [[j, i, array_to_use[j, i, k]] for i in range(array_to_use.shape[1]) for j in range(array_to_use.shape[0]) if j != user_index] formatted_codes = [[0, i, array_to_use[user_index, i, k] * 1.] for i in range(array_to_use.shape[1])] ratingtask = agreement.AnnotationTask(data=formatted_codes_except) try: value = ratingtask.multi_kappa() except ZeroDivisionError: value = None new_row = { 'user': user, 'label': labels_list[k], 'trial': 'all', 'title': 'Fleiss Kappa (except user)', 'value': value } results_csv = results_csv.append(new_row, ignore_index=True) formatted_codes_atleast1 = [[ 1, i, (sum(numpy.delete(array_to_use[:, i, k], user_index, 0)) >= 1) * 1. ] for i in range(array_to_use.shape[1])] results_csv = calculate_per_user('atleast1', results_csv, formatted_codes, formatted_codes_atleast1, labels_list[k], user) formatted_codes_atleast2 = [[ 1, i, (sum(numpy.delete(array_to_use[:, i, k], user_index, 0)) >= 2) * 1. ] for i in range(array_to_use.shape[1])] results_csv = calculate_per_user('atleast2', results_csv, formatted_codes, formatted_codes_atleast2, labels_list[k], user) formatted_codes_atleast3 = [[ 1, i, (sum(numpy.delete(array_to_use[:, i, k], user_index, 0)) >= 3) * 1. ] for i in range(array_to_use.shape[1])] results_csv = calculate_per_user('atleast3', results_csv, formatted_codes, formatted_codes_atleast3, labels_list[k], user) for user_index_pair, user_pair in enumerate( sorted(labels_table.keys())): if user_index_pair != user_index: formatted_codes_pair = [[ 1, i, array_to_use[user_index_pair, i, k] * 1. ] for i in range(array_to_use.shape[1])] results_csv = calculate_per_user('pair', results_csv, formatted_codes, formatted_codes_pair, labels_list[k], user, user_pair) formatted_codes_majority = [[ 1, i, ((array_to_use[:, i, k]).sum() > 2.5) * 1. ] for i in range(array_to_use.shape[1])] results_csv = calculate_per_user('majority', results_csv, formatted_codes, formatted_codes_majority, labels_list[k], user) ious_box = [] for trial in trials_list: if use_trials[int(trial) - 1]: for k in range(len(labels_list)): for user_index, user in enumerate(sorted(labels_table.keys())): if trial in box_table[user].keys(): list_of_boxes = box_table[user][trial][labels_list[k]] list_of_certainties = certainty_box_table[user][trial][ labels_list[k]] list_of_indexes_boxes = index_box_table[user][trial][ labels_list[k]] for l in range(len(list_of_boxes)): new_row = { 'user': user, 'extra_info': list_of_indexes_boxes[l], 'label': labels_list[k], 'trial': trial, 'title': 'BBox (Ellipse) certainty', 'value': list_of_certainties[l] } results_csv = results_csv.append(new_row, ignore_index=True) for coord_index, coord in enumerate( convert_center_axis_to_corners( list_of_boxes[l])): new_row = { 'user': user, 'extra_info': list_of_indexes_boxes[l], 'label': labels_list[k], 'trial': trial, 'title': 'BBox (Ellipse) coord ' + str(coord_index), 'value': coord } results_csv = results_csv.append( new_row, ignore_index=True) # do IoU for label BBox for every pairs of users for user_index_2, user_2 in enumerate( sorted(labels_table.keys())): if user_index_2 != user_index: if trial in box_table[user].keys( ) and trial in box_table[user_2].keys(): # print(box_table[user][trial][labels_list[k]]) if len(box_table[user][trial][labels_list[k]] ) > 0 and len(box_table[user_2][trial][ labels_list[k]]) > 0: value = get_iou( box_table[user][trial][labels_list[k]], box_table[user_2][trial][ labels_list[k]], create_ellipse) new_row = { 'user': user, 'extra_info': user_2, 'label': labels_list[k], 'trial': trial, 'title': 'IoU box (pair)', 'value': value } results_csv = results_csv.append( new_row, ignore_index=True) ious_chest = [] for user_index, user in enumerate(sorted(labels_table.keys())): ious_this_user = [] for trial in trials_list: if use_trials[int(trial) - 1]: for coord_index, coord in enumerate(chest_table[user][trial]): new_row = { 'user': user, 'extra_info': '', 'label': '', 'trial': trial, 'title': 'ChestBox (Rectangle) coord ' + str(coord_index), 'value': coord } results_csv = results_csv.append(new_row, ignore_index=True) for user_index_2, user_2 in enumerate( sorted(labels_table.keys())): if user_index_2 != user_index: ious_this_user.append( get_iou([chest_table[user][trial]], [chest_table[user_2][trial]], create_box)) new_row = { 'user': user, 'extra_info': user_2, 'label': '', 'trial': trial, 'title': 'IoU chest (pair)', 'value': ious_this_user[-1] } results_csv = results_csv.append(new_row, ignore_index=True) # new_row = {'user':user, 'label':'', 'trial':'all', 'title':'IoU chest (pair, by user)', 'value':sum(ious_this_user)/len(ious_this_user)} # results_csv = results_csv.append(new_row, ignore_index=True) # ious_chest += ious_this_user # new_row = {'user':'******', 'label':'', 'trial':'all', 'title':'IoU chest (pair)', 'value':sum(ious_chest)/len(ious_chest)} # results_csv = results_csv.append(new_row, ignore_index=True) results_csv.to_csv('results_phase_2.csv', index=False)
def generate_mapping(): from nltk import agreement #https://stackoverflow.com/questions/11528150/inter-rater-agreement-in-python-cohens-kappa map_same_hit = defaultdict(list) with open('Batch_3159448_batch_results.csv', 'r', encoding='utf-8') as csv_file: csr_reader = csv.DictReader(csv_file) for row in csr_reader: map_same_hit[row['HITId']].append(row) agreement_data = [] mapping = mapping_data() for hit_id, hits in map_same_hit.items(): source_url = hits[0]['Input.domain_src'] source_domain = source_url[7:source_url.index('.wikia.com')] dst_1_url = hits[0]['Input.domain_dst1'] dst_1_domain = dst_1_url[7:dst_1_url.index('.wikia.com')] dst_2_url = hits[0]['Input.domain_dst2'] dst_2_domain = dst_2_url[7:dst_2_url.index('.wikia.com')] if len(hits) == 5: print("Hit") for i in range(10): answers_dst_1, majority_dst_1, worker_ids_1 = get_checked_answers( hits, i, 1, dst_1_url + '/wiki/') agreement_data.extend([ (str(j), hit_id + '_' + str(i), answers_dst_1[j]) for j in range(5) ]) # use autoincrement id for each worker for each task #agreement_data.extend([(worker_ids_1[j], hit_id + '_' + str(i), answers_dst_1[j]) for j in range(5)]) # use the worker id from mturk answers_dst_2, majority_dst_2, worker_ids_2 = get_checked_answers( hits, i, 2, dst_2_url + '/wiki/') resource_src = 'http://dbkwik.webdatacommons.org/' + source_domain + '/resource/' + hits[ 0]['Input.wiki_{}'.format(i)] if majority_dst_1 != "no match": resource_dst_1 = get_dbkwik_uri_destination(majority_dst_1) #print("mapping: {} -> {}".format(resource_src, resource_dst_1)) mapping.add_mapping(source_domain, resource_src, dst_1_domain, resource_dst_1, '=') else: mapping.add_mapping(source_domain, resource_src, dst_1_domain, 'null', '%') if majority_dst_2 != "no match": resource_dst_2 = get_dbkwik_uri_destination(majority_dst_2) #print("mapping: {} -> {}".format(resource_src, resource_dst_2)) mapping.add_mapping(source_domain, resource_src, dst_2_domain, resource_dst_2, '=') else: mapping.add_mapping(source_domain, resource_src, dst_2_domain, 'null', '%') #transitivity if majority_dst_1 != "no match" and majority_dst_2 != "no match": #print("mapping transitiv: {} -> {}".format(resource_dst_1, resource_dst_2)) mapping.add_mapping(dst_1_domain, resource_dst_1, dst_2_domain, resource_dst_2, '=') if majority_dst_1 != "no match" and majority_dst_2 == "no match": mapping.add_mapping(dst_1_domain, resource_dst_1, dst_2_domain, 'null', '%') if majority_dst_1 == "no match" and majority_dst_2 != "no match": mapping.add_mapping(dst_2_domain, resource_dst_2, dst_1_domain, 'null', '%') agreement_data = sorted(agreement_data, key=lambda x: (x[0], x[1])) ratingtask = agreement.AnnotationTask(data=agreement_data) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("alpha " + str(ratingtask.alpha())) print("scotts " + str(ratingtask.pi())) #add schema for schema_file in glob.glob('./schema/*'): file_split = os.path.basename(schema_file).split('~') domain_one = file_split[0] domain_two = file_split[1] with open(schema_file, 'rb') as f: for s, p, o in parse(f): if s.value == 'null': mapping.add_mapping(domain_one, 'null', domain_two, o.value, '%') if o.value == 'null': mapping.add_mapping(domain_one, s.value, domain_two, 'null', '%') if s.value != 'null' and o.value != 'null': mapping.add_mapping(domain_one, s.value, domain_two, o.value, '=') for (src_domain, dst_domain), mapping in mapping.items(): serialize_mapping_to_file( './gold/' + src_domain + '~' + dst_domain + "~evaluation.xml", mapping, (src_domain, 'http://' + src_domain + '.wikia.com'), (dst_domain, 'http://' + dst_domain + '.wikia.com'))
def calculate_per_user(name, results_csv, formatted_codes, formatted_codes_other, label, user, user2=None): try: value_roc = roc_auc_score( np.array([row[2] for row in formatted_codes_other]), np.array([row[2] for row in formatted_codes])) except ValueError: value_roc = None new_row = { 'user': user, 'extra_info': user2, 'label': label, 'trial': 'all', 'title': 'AUC (' + name + ')', 'value': value_roc } results_csv = results_csv.append(new_row, ignore_index=True) arg_1 = np.array([row[2] for row in formatted_codes_other]) arg_2 = np.array([row[2] for row in formatted_codes]) if sum(arg_1) == 0 and sum(arg_2) == 0: tn = len(arg_1) tp = 0 fp = 0 fn = 0 elif sum(arg_1) == len(arg_1) and sum(arg_2) == len(arg_2): tn = 1 tp = len(arg_1) fp = 0 fn = 0 else: tn, fp, fn, tp = confusion_matrix(arg_1, arg_2).ravel() if tn + fp != 0: specificity = tn / (tn + fp) else: specificity = None new_row = { 'user': user, 'extra_info': user2, 'label': label, 'trial': 'all', 'title': 'Specificity (' + name + ')', 'value': specificity } results_csv = results_csv.append(new_row, ignore_index=True) if tp + fn != 0: recall = tp / (tp + fn) else: recall = None new_row = { 'user': user, 'extra_info': user2, 'label': label, 'trial': 'all', 'title': 'Recall (' + name + ')', 'value': recall } results_csv = results_csv.append(new_row, ignore_index=True) if tp + fp != 0: precision = tp / (tp + fp) else: precision = None new_row = { 'user': user, 'extra_info': user2, 'label': label, 'trial': 'all', 'title': 'Precision (' + name + ')', 'value': precision } results_csv = results_csv.append(new_row, ignore_index=True) ratingtask = agreement.AnnotationTask(data=formatted_codes_other + formatted_codes) try: value = ratingtask.multi_kappa() except ZeroDivisionError: value = None new_row = { 'user': user, 'extra_info': user2, 'label': label, 'trial': 'all', 'title': 'Fleiss Kappa (' + name + ')', 'value': value } results_csv = results_csv.append(new_row, ignore_index=True) return results_csv