def getInterraterReliabilityWithinGroup(group_list,df_annotations,shared_comments): alphas = [] # pivot data frame df_annotations_pivot = pd.pivot_table(df_annotations, values='attack', index=['rev_id'], columns=['worker_id'], aggfunc=np.sum) # baseline group df_annotations_pivot_baseline_group = df_annotations_pivot[df_annotations_pivot.index.isin(shared_comments['rev_id'])] annotations_of_group = [] for rater in df_annotations_pivot_baseline_group.columns: annotation = df_annotations_pivot_baseline_group[int(rater)].tolist() # add only with at least one annotation if np.count_nonzero(~np.isnan(annotation)) != 0: annotations_of_group.append(annotation) print(len(df_annotations_pivot_baseline_group)) print(len(annotations_of_group)) alphas.append(krippendorff.alpha(reliability_data=annotations_of_group, level_of_measurement='nominal')) # other groups for i in range(1,len(group_list)): annotations_of_group = [] # select only annotations from selected group for rater in group_list[i]: annotations_of_group.append(df_annotations_pivot_baseline_group[int(rater)].tolist()) # calculate krippendorffs alpha for sleected group if len(annotations_of_group) == 0: alphas.append(0.6) else: alphas.append(krippendorff.alpha(reliability_data=annotations_of_group, level_of_measurement='nominal')) return alphas
def main(argv): if len(argv) > 1: raise app.UsageError("Too many command-line arguments.") if not os.path.exists(FLAGS.input_file_path): raise ValueError("No data found at %s" % FLAGS.input_file_path) reliability_matrix, annotators_to_idx, failed_count = get_reliability_matrix( FLAGS.input_file_path) alpha = krippendorff.alpha(reliability_matrix) print("Alpha without removing annotators: %.8f" % alpha) # Remmove annotator with too little answers. failed_annotators = [] for annotator, count in failed_count.items(): if count >= FLAGS.num_failing: row_to_remove = annotators_to_idx[annotator] failed_annotators.append(annotator) reliability_matrix = np.delete(reliability_matrix, row_to_remove, axis=0) alpha = krippendorff.alpha(reliability_matrix) print("Alpha with removing annotators: %.8f" % alpha) print("Removed annotators: ", failed_annotators)
def compute_krippendorff(sce_path, output_path='', wo_attention_check=False, bad_annotators_path='', dataset=''): """ Compute Krippendorff's alpha with krippendorff library (https://github.com/pln-fing-udelar/fast-krippendorff/blob/master/sample.py) :param sce_path: csv file with columns UID, ANSWER, ANNOTATOR :param output_path: path of the output file where the results will be printed (if empty string the results are printed in the standart output) :param wo_attention_check: if True remove the attention check when computing alpha :param bad_annotators_path: path of the pkl file containing for each threshold the list of 'bad' annotators. For each threshold remove the annotations of the annotators listed when computing alpha. If empty string no annotator's annotation it removed. :param dataset: alphanumeric characters identifying the corpus to compute the alpha (if empty string the alpha is computed with annotation from all corpora and from attention check) """ if output_path: sys.stdout = open(output_path, "w") rows = read_csv(sce_path, dataset=dataset) bad_annotators_per_th = get_bad_annotators(bad_annotators_path) for th, bad_annotators in bad_annotators_per_th.items(): print(f'--- Threshold {th}---') annotations = get_annotations_per_annotators( rows, wo_attention_check=wo_attention_check, wo_annotator=bad_annotators) print('- After filtering: -') print_annotation_statistics(annotations) ratings_per_annotator = get_annotator_tab(annotations) data = [[np.nan if not r else int(r) for r in ratings] for ratings in ratings_per_annotator] print( "Krippendorff's alpha for nominal metric: ", krippendorff.alpha(reliability_data=data, level_of_measurement='nominal')) print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(reliability_data=data)) print( "Krippendorff's alpha for ordinal metric: ", krippendorff.alpha(reliability_data=data, level_of_measurement='ordinal')) # with nltk library task_data = annotations2task_data(annotations) rating_task = AnnotationTask(data=task_data, distance=ordinal) print("Krippendorff's alpha for ordinal metric (nltk): ", rating_task.alpha())
def compute_reliability(date_scores): tl_base_path = Path("./gold-timelines") all_annotators = sorted(date_scores) all_topics = set() for scores in date_scores.values(): for topic, tl_name in scores: all_topics.add((topic, tl_name)) for topic, tl_name in all_topics: with open(tl_base_path / topic / (tl_name + ".txt"), errors="ignore") as f: tl = Timeline.from_file(f) score_matrix = np.zeros((len(date_scores), len(tl.get_dates()))) all_dates = sorted(tl.get_dates()) for annotator_idx, annotator in enumerate(all_annotators): annotator_tl_scores = date_scores[annotator][(topic, tl_name)] sorted_dates = sorted( all_dates, key=lambda date: annotator_tl_scores.get(date, 0), reverse=True) new_annotator_tl_scores = {} curr_idx = 0 prev_score = None for date in sorted_dates: score = annotator_tl_scores.get(date, 0) if prev_score is None or prev_score != score: curr_idx += 1 prev_score = score new_annotator_tl_scores[date] = curr_idx for date_idx, date in enumerate(all_dates): score_matrix[annotator_idx, date_idx] = new_annotator_tl_scores.get( date, 0) print(topic, tl_name, k.alpha(score_matrix, level_of_measurement="ordinal")) for annotator_1_idx, annotator_2_idx in it.combinations( range(len(all_annotators)), 2): annotator_1 = ANNOTATORS[all_annotators[annotator_1_idx]] annotator_2 = ANNOTATORS[all_annotators[annotator_2_idx]] annotator_rows = score_matrix[[annotator_1_idx, annotator_2_idx]] print(annotator_1, annotator_2, k.alpha(annotator_rows, level_of_measurement="interval"))
def compare(et_coders, et_judgments, ht_coders, ht_judgments): """Compare the agreement between two annotation jobs <et> and <ht>. :param et_coders: coders of the easier task :param et_judgments: labels assigned by the coders of the easier task :param ht_coders: coders of the harder task :param ht_judgments: labels assigned by the coders of the harder task :return: difference between the agreement of <et> and <ht> """ rd1 = build_reliability_data(et_coders, et_judgments) a1 = krippendorff.alpha(rd1) rd2 = build_reliability_data(ht_coders, ht_judgments) a2 = krippendorff.alpha(rd2) return a1 - a2
def getInterraterReliabilityBetweenGroups(group_list,df_annotations,shared_comments): alphas = np.empty([len(group_list), len(group_list)]) # pivot data frame df_annotations_pivot = pd.pivot_table(df_annotations, values='attack', index=['rev_id'], columns=['worker_id'], aggfunc=np.sum) # baseline group df_annotations_pivot_baseline_group = df_annotations_pivot[df_annotations_pivot.index.isin(shared_comments['rev_id'])] for i in range(0,len(group_list)): for j in range(i+1,len(group_list)): annotations_of_group = [] if i == 0: for rater in df_annotations_pivot_baseline_group.columns: annotation = df_annotations_pivot_baseline_group[int(rater)].tolist() # add only with at least one annotation if np.count_nonzero(~np.isnan(annotation)) != 0: annotations_of_group.append(annotation) else: for rater in group_list[i]: annotations_of_group.append(df_annotations_pivot_baseline_group[int(rater)].tolist()) for rater in group_list[j]: annotations_of_group.append(df_annotations_pivot_baseline_group[int(rater)].tolist()) alpha = krippendorff.alpha(reliability_data=annotations_of_group, level_of_measurement='nominal') alphas[i][j] = alpha alphas[j][i] = alpha return alphas
def compute_alpha(item_annotations): matrix = [] for batch_idx, batch in enumerate(item_annotations): if len(matrix) == 0: previous_length = 0 else: previous_length = len(matrix[-1]) for annotator_idx, annotator_data in enumerate(batch): matrix.append([np.nan] * previous_length + annotator_data) longest_row_length = max(map(len, matrix)) for row in matrix: missing_nans = longest_row_length - len(row) if missing_nans == 0: continue row.extend([np.nan] * missing_nans) #print(matrix[0]) matrix = np.array(matrix) #print(matrix[:2,:200]) return krippendorff.alpha(reliability_data=matrix, level_of_measurement='interval')
def alpha(self, ids=None, staff="upper", common_id=None, lib='nltk', label='bigram', distance=None): if ids is None: ids = [] if staff not in ('upper', 'lower'): raise Exception( "Alpha measure only applicable one staff at a time.") data = self._staff_annotation_data(ids=ids, staff=staff, lib=lib, label=label, common_id=common_id) if distance is None and label == "bigram": distance = DScore.bigram_label_distance if lib == 'nltk': if distance is None: distance = binary_distance annot_task = AnnotationTask(data=data, distance=distance) krip = annot_task.alpha() else: if distance is None: distance = 'nominal' krip = alpha(reliability_data=data, level_of_measurement=distance) return krip
def krippendorff_alpha(self, window=1.): import krippendorff import numpy as np data1 = [] data2 = [] t = self.start while t < self.end: c1 = self.t1p.attime(t) if c1 == MISSINGDATA: data1.append(np.nan) else: data1.append(hash(c1)) c2 = self.t2p.attime(t) if c2 == MISSINGDATA: data2.append(np.nan) else: data2.append(hash(c2)) t += window return krippendorff.alpha([data1, data2], level_of_measurement='nominal')
def pypi_alpha_old(self, ids=None, staff="both", common_id=None, label='bigram', distance=binary_distance): if ids is None: ids = [] if staff not in ('upper', 'lower'): raise Exception( "PyPI krippendorff alpha only applicable one staff at a time.") if label == 'bigram': data = self._bigram_reliability_data(ids=ids, staff=staff, common_id=common_id) else: data = self._reliability_data(ids=ids, staff=staff, common_id=common_id) value_domain = [ '>1', '>2', '>3', '>4', '>5', '<1', '<2', '<3', '<4', '<5' ] krip = alpha(reliability_data=data, level_of_measurement='nominal', value_domain=value_domain) return krip
def alpha_for_question(self, raters_to_exclude=set()): reliability_data = self.to_reliability( raters_to_exclude=raters_to_exclude) value_domain = sorted(self.values_map.values()) k_alpha = alpha(reliability_data=reliability_data, value_domain=value_domain, level_of_measurement=self.alpha_distance) return k_alpha
def scoreAlpha(answerMatrix, distanceFunc): """provides the krippendorff scores of the data passed in, distanceFunc should be 'nominal', 'ordinal', 'interval', 'ratio' or a callable """ return krippendorff.alpha(value_counts = answerMatrix, \ level_of_measurement = distanceFunc)
def score(path): df = pd.read_csv(path, sep=',', header=None) total_mean = df.mean(axis=1).mean() score_matrix = df.T.values krip = krippendorff.alpha(score_matrix, level_of_measurement='ratio') print('Krippendorff\'s alpha coefficient:', round(krip, 3)) print('Mean score:', round(total_mean, 3)) return df
def krippendorfs_alpha(upper_rh_advice, exercise_upper_gold): fingerings = list(upper_rh_advice) fingerings.pop(0) finger_ints = list(map(int, fingerings)) exercise_upper_gold.append(finger_ints) krip = alpha(reliability_data=exercise_upper_gold, level_of_measurement='interval') exercise_upper_gold.pop() return krip
def validate(output, test_labels) -> Tuple[float, float, float]: p = list(predict(output)) # Also 'predict' the true labels to convert from N neurons to single value actual = predict(test_labels) acc = sum([x == y for (x, y) in zip(p, actual)]) / len(p) cor = np.corrcoef(p, actual)[1][0] alpha = krippendorff.alpha(np.vstack([p, actual]), level_of_measurement='interval') return acc, cor, alpha
def main(): print("Example from http://en.wikipedia.org/wiki/Krippendorff's_Alpha") print() reliability_data_str = ( "* * * * * 3 4 1 2 1 1 3 3 * 3", # coder A "1 * 2 1 3 3 4 3 * * * * * * *", # coder B "* * 2 1 3 4 4 * 2 1 1 3 3 * 4", # coder C ) print('\n'.join(reliability_data_str)) print() reliability_data = [[np.nan if v == '*' else int(v) for v in coder.split()] for coder in reliability_data_str] print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(reliability_data=reliability_data, level_of_measurement='nominal')) print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(reliability_data=reliability_data)) print() print() print("From value counts:") print() value_counts = np.array([[1, 0, 0, 0], [0, 0, 0, 0], [0, 2, 0, 0], [2, 0, 0, 0], [0, 0, 2, 0], [0, 0, 2, 1], [0, 0, 0, 3], [1, 0, 1, 0], [0, 2, 0, 0], [2, 0, 0, 0], [2, 0, 0, 0], [0, 0, 2, 0], [0, 0, 2, 0], [0, 0, 0, 0], [0, 0, 1, 1]]) print(value_counts) print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(value_counts=value_counts, level_of_measurement='nominal')) print("Krippendorff's alpha for interval metric: ", krippendorff.alpha(value_counts=value_counts))
def calculate_agreement(main_annotator, second_annotator, attribute_type): if attribute_type == 'perspective': main_annotator_sub = get_perspectives_based_on_sample( second_annotator, main_annotator) else: main_annotator_sub = get_stance_based_on_sample( second_annotator, main_annotator) reliability_data = [ main_annotator_sub[attribute_type], main_annotator_sub['annotator'] ] krippendorff_alpha = krippendorff.alpha(reliability_data=reliability_data, level_of_measurement='nominal') print(attribute_type, ': ', krippendorff_alpha)
def krippendorffAlpha(self, listOfXLSXFiles=[], mode="mweAgreement"): s = KrippendorffMeasure annotator_values_inWholeDataset = [] k_list = [] for file in listOfXLSXFiles: if mode == "mweAgreement": annotator_values = s.readXLSXFile_mweAgreement( self, xlsx_filename=file) elif mode == "semAgreement": annotator_values = s.readXLSXFile_semAgreement( self, xlsx_filename=file) elif mode == "freeAdjAgreement": annotator_values = s.readXLSXFile_freeAdjAgreement( self, xlsx_filename=file) else: print("Error: unknown mode") reliability_matrix = s.krippendorffReliabilityMatrix( self, annotator_values) #levellevel_of_measurement='interval' levellevel_of_measurement = 'nominal' a = krippendorff.alpha( reliability_data=reliability_matrix, value_counts=None, level_of_measurement=levellevel_of_measurement) k_list.append((file, a)) annotator_values_inWholeDataset = annotator_values_inWholeDataset + annotator_values reliability_matrix_inWholeDataset = s.krippendorffReliabilityMatrix( self, annotator_values_inWholeDataset) #print(reliability_matrix_inWholeDataset) a = krippendorff.alpha( reliability_data=reliability_matrix_inWholeDataset, value_counts=None, level_of_measurement='interval') k_list.append(("All", a)) #print("a = " + str(k_list)) return k_list
def __init__(self, separator=': ', **kwargs): self.target = kwargs self.data = np.array(kwargs['data']) self.difference = krippendorff.Difference(*kwargs['args']) self.separator = separator self.labels = ('Data', 'Data type', 'Difference method', 'Observed agreement', 'Expected agreement', 'Alpha score') self.values = set(v for v in self.data.flatten() if v == v) self.codebook = {v: i for (i, v) in enumerate(self.values)} self.inverse_codebook = dict(enumerate(self.values)) self.cm = krippendorff.get_coincidence_matrix(self.data, self.codebook) self.d = krippendorff.delta(self.cm, self.inverse_codebook, self.difference) self.observed = krippendorff.observation(self.cm, self.d) self.expected = krippendorff.expectation(self.cm, self.d) self.alpha = krippendorff.alpha(self.data, self.difference)
def print_alpha_for_question(self, raters_to_exclude=set()): reliability_data = self.to_reliability( raters_to_exclude=raters_to_exclude) value_domain = sorted(self.values_map.values()) pairable_values = calc_pairable_values(reliability_data, value_domain) k_alpha = alpha(reliability_data=reliability_data, value_domain=value_domain, level_of_measurement=self.alpha_distance) maximum_raters = reliability_data.shape[0] total_units = reliability_data.shape[1] print("----{}".format(self.label)) print("{}".format(self.question_text)) print("Units: {} Max raters: {} Pairable values: {}".format( total_units, maximum_raters, pairable_values)) print( "Krippendorff alpha for '{}' is {:.3f} Alpha distance: {} Value domain: {}" .format(self.label, k_alpha, self.alpha_distance, value_domain))
def print_alpha_for_topic(topic_name, rows, maximum_raters, cumulative_length, virtual_corpus_positions): dtype = float reliability_data = np.full((maximum_raters, cumulative_length), np.nan, dtype=dtype) for row_count, output_row in output_generator(rows, virtual_corpus_positions): start_pos = output_row['start_pos'] end_pos = output_row['end_pos'] user_sequence_id = output_row['user_sequence_id'] topic_number = output_row['topic_number'] reliability_data[user_sequence_id][start_pos:end_pos] = dtype( topic_number) k_alpha = alpha(reliability_data=reliability_data, level_of_measurement='nominal') print("Krippendorff alpha is {:.3f} for '{}'".format(k_alpha, topic_name))
def krippen_alpha(df, metric, lom): """ Given a dataframe, a column in that dataframe, and a level of measurement, compute Krippendorff's Alpha for that column and level of measurement. """ metric_vals = np.unique(df[metric]) def metric_vals_to_indices(vals, mvals): indices = [np.nonzero(mvals == val)[0][0] for val in vals] return np.array(indices) uniq_ids = np.unique(df['questionId']) shape = (uniq_ids.shape[0], metric_vals.shape[0]) value_counts = np.zeros(shape, dtype=float) for i, q_id in zip(range(len(uniq_ids)), uniq_ids): assignments = df.query('questionId==@q_id')[metric].to_numpy() vals, counts = np.unique(assignments, return_counts=True) indices = metric_vals_to_indices(vals, metric_vals) value_counts[i][indices] += counts alpha = krippendorff.alpha(value_counts=value_counts, value_domain=metric_vals.tolist(), level_of_measurement=lom) return alpha
def get_krippendorffs_alpha(evals_matrix): """Calcula el alpha de Krippendorff, tomando en cuenta todas las anotaciones realizadas por los patólogos. Esta medida estadística puede trabajar con datos perdidos, por lo cual no es necesario trabajar estos datos de forma especial. Además, debido a que la clasificación es de tipo (0, 1, 2, 3) y existe una jerarquía entre dichos valores, se usa una métrica de tipo ordinal. Args: - evals_matrix: np.array(), matriz con las evaluaciones realizadas por los patologos. Es una matriz de forma (N, M), donde N es el número de muestras y M es el número de anotadores. Además, la matriz debe ser de tipo float, y si es que existen datos perdidos, deben estar codificados como np.nan. Returns: dict[str: float], con una llave: krippendorff_alpha, la cual tiene asociada el alpha de Krippendorff calculado. """ alpha = krippendorff.alpha(evals_matrix.transpose(), level_of_measurement="ordinal") return {"krippendorff_alpha": alpha}
def cohkap(pdfilename, grtfilename, resultfilename, y=0): #Reading the respective csv files prediction_df = pd.read_csv(pdfilename)[['image', 'label']] ground_df = pd.read_csv(grtfilename) result_coh = resultfilename.strip('.csv') + '_coh.csv' result_f1 = resultfilename.strip('.csv') + '_f1.csv' #performing inner merge mergedf_in = pd.merge(prediction_df, ground_df, on='image', how='inner') merged_list = mergedf_in.values[:, 1:].transpose().tolist() kap = np.zeros(len(ground_df.columns[1:])) f1 = np.zeros(len(ground_df.columns[1:])) #Making a matrix of Kohen's kappa Values and f1 values of all cases of predictions with each rater for i in range(len(ground_df.columns)-1): kap[i] = cohen_kappa_score(mergedf_in.iloc[:,1:2], mergedf_in.iloc[:,i+2:i+3], weights='quadratic') f1[i] = f1_score(merged_list[0], merged_list[i+1], average='weighted') columns = mergedf_in.columns.tolist() columns.remove('image') columns.remove('label') kap = pd.DataFrame(kap, index=[columns], columns=[pdfilename.split('/')[-1].strip('.csv')]).transpose() f1 = pd.DataFrame(f1, index=[columns], columns=[pdfilename.split('/')[-1].strip('.csv')]).transpose() if y: M = ground_df.values[:,1:].transpose() E4.delete(0, END) Entry.insert(E4, 0, krippendorff.alpha(M.tolist())) # Writing Coh to csv if not os.path.exists(result_coh): kap.to_csv(result_coh, mode='a') else: kap.to_csv(result_coh, mode='a',header=False) # Writing F1 to csv if not os.path.exists(result_f1): f1.to_csv(result_f1, mode='a') else: f1.to_csv(result_f1, mode='a',header=False)
def get_agreement(self, on="HATE", users=None): """ Get agreement Arguments: --------- on: "string" Must be one of - hate - MUJER - LGBTI - RACISMO - POBREZA - DISCAPACIDAD - POLITICA - ASPECTO - CRIMINAL - OTROS """ df = self.get_labelled_comments(on) if users: df = df.loc[users] """ Get support """ #labelled_by_all = df.columns[df.notna().all()] #any_marked_positive = df[labelled_by_all].sum() > 0 any_marked_positive = (df > 0).sum() support = (any_marked_positive > 0).sum() if support == 0: return np.nan, support return krippendorff.alpha(df.values.astype('float')), support
def getWeightKrippendorffMatrix(df,min_overlap=1): df_matrix = df.to_numpy() number_workers = np.size(df_matrix,1) # empty distance matrix distance_matrix = np.zeros((number_workers,number_workers)) list_over = [] for i in tqdm(range(0,number_workers)): for j in range(i+1,number_workers): weight = 0 annotator_1 = df_matrix[i] annotator_2 = df_matrix[j] annotator_1_cleaned = annotator_1[~np.isnan(annotator_2)] annotator_2_cleaned = annotator_2[~np.isnan(annotator_1)] annotator_1_cleaned = annotator_1_cleaned[~np.isnan(annotator_1_cleaned)] annotator_2_cleaned = annotator_2_cleaned[~np.isnan(annotator_2_cleaned)] len_overlap = len(annotator_2_cleaned) list_over.append(len_overlap) if len_overlap >= min_overlap: try: kd_value = krippendorff.alpha(reliability_data=[annotator_1_cleaned,annotator_2_cleaned], level_of_measurement='nominal') except RuntimeWarning: print(annotator_1_cleaned) print(annotator_2_cleaned) if kd_value < 2: weight = 0.5+ (kd_value +1)/(2) #weight = 0.5+ kd_value distance_matrix[i,j] = weight distance_matrix[j,i] = weight return (distance_matrix,list_over)
def main(): parser = argparse.ArgumentParser(description='This program calculates '\ 'Krippendorff\'s Alpha given nominal data or ordinal data.') parser.add_argument('level_of_measurement', choices=['nominal', 'ordinal'], nargs=1, help='set level of measurement') parser.add_argument('data', nargs=1, help='set the data file') parser.add_argument('-w', '--weights', nargs=1, help='set the weights file,' ' required for ordinal data only') args = parser.parse_args() metric = args.level_of_measurement[0] data = args.data[0] if (metric == 'ordinal') & (args.weights == None): parser.error('Must provide a separate file for weights.') check_input(data) df = pd.read_csv(data) reliability_data = [] if metric == 'ordinal': check_input(args.weights[0]) df1 = pd.read_csv(args.weights[0], sep=",\s", header=None, names=["c", "w"], engine="python") weights = dict(zip(df1.c, df1.w)) for i in range(1, len(df.columns)): temp = df.apply(lambda x: np.nan if str(x[i]).strip() == '' \ else weights[x[i]], axis=1) reliability_data.append(temp.to_list()) else: for i in range(1, len(df.columns)): temp = df.apply(lambda x: np.nan if str(x[i]).strip() == '' \ else x[i], axis=1) reliability_data.append(temp.to_list()) print("Krippendorff's alpha for {} metric:".format(metric), krippendorff.alpha(reliability_data=reliability_data, level_of_measurement=metric))
def krippendorff_metric_study(case_, additional_measures): data_raw = pd.read_csv("data/" + case_ + ".csv") data_vectors = {} rounds_ = data_raw['timeunit'].unique() delegates = data_raw['name'].unique() print("Loaded {rounds} sample rounds".format(rounds=len(rounds_))) print("Rounds contain {delegates} delegates".format( delegates=len(delegates))) data_new = pd.DataFrame(0, index=rounds_, columns=delegates) for round_ in rounds_: data_vectors[round_] = list( data_raw[data_raw['year'] == round_]['name']) for round_ in rounds_: for name_ in data_vectors[round_]: data_new[name_][round_] = 1 alpha = ka.alpha(data_new) print("k'aplpha " + str(alpha)) # TODO: test this if additional_measures: data_new_nltk = [] for round_ in rounds_: for name_ in delegates: data_new_nltk.append([round_, name_, data_new[name_][round_]]) ratingtask = agreement.AnnotationTask(data=data_new_nltk) print("kappa " + str(ratingtask.kappa())) print("fleiss " + str(ratingtask.multi_kappa())) print("scotts " + str(ratingtask.pi())) return alpha
def buildKrippendorffMatrix(surveys): #dictionary containing the IDs for each category: key - category name categories = {} categoryIndex = 1 for category in categoryList: categories[category[0].strip()] = categoryIndex categoryIndex = categoryIndex + 1 categories["Other"] = categoryIndex categories["None"] = categoryIndex + 1 #get the name of the first input survey surveyName = list(surveys)[0] #get the first input survey survey = surveys[surveyName] #initialize counting the current column number currentColumn = 0 #number of term termNumber = 1 #String for the first line of the Krippendorff CSV file firstTop = "Rater" #String for the second line of the Krippendorff CSV file secondTop = "" #String for the next-to-last line of the Krippendorff CSV file firstBottom = "" for category in categories: firstBottom = firstBottom + category.strip() + "," #String for the line of the Krippendorff CSV file secondBottom = "" for category in categories: secondBottom = secondBottom + str(categories[category.strip()]) + "," #initialize counting the current column number currentColumn = 0 #loop over each question and noun of the first input survey --> first key for term in survey: #check if the question starts with the String 'researchfield'. If it does, leave the loop #--> end of the actual data in the file if (term.lower().startswith("researchfield")): break #check if the current column number is equal or greater than the user set column number (begin of actual data). #If it does, start parsing the data of the file. Else, continue to the next column and check again if (currentColumn >= column): #get the title name title = term.split(".")[0].split("[")[0] #check if the question is an 'other' of comment section. If it is, ignore this question and noun #and continue to the next question and/or noun. Else, parse the question and noun if (title[-1] == "C" or title[-2:] == "CQ"): continue else: #get the noun noun = term.split("].")[1].split("[")[1][:-1].replace(",", ";") firstTop = firstTop + ",<title>-Term" + str(termNumber) #increase the term number by 1 termNumber = termNumber + 1 secondTop = secondTop + "," + title + "-" + noun #loop over each participant ID for persID in persIDdict: try: #loop over each survey of the survey dictionary for surveyKey in surveys: #initialize the category String category = "" #get the list of participant IDs persIDs = surveys[surveyKey]["id. Response ID"] #initialize index of participant ID surveyIDindex = None #check if the survey contain the participant ID. If it does, get the index of the participant ID. #Else, continue to the next survey if (persID in persIDs.tolist()): surveyIDindex = persIDs[persIDs == persID].index[0] else: continue #get the answer answer = str( surveys[surveyKey][term][surveyIDindex]) #if the answer is 'other', set the category String to 'Other' if (answer.startswith("<div>other")): category = "Other" #if the answer is empty (no answer was given/NaN), set the category String to 'None' elif (answer == "nan"): category = "None" #else, set the category String to the answer given by the survey else: category = answer.split("</span>")[0].split( ">")[-1] try: #append the line String of each participant by the category ID persIDdict[persID] = persIDdict[ persID] + "," + str(categories[category]) except: raise IndexError( "The program couldn't evaluate the category '" + category + "'. Please make sure that this category is contained in the 'categories' file." ) except IndexError as ie: raise IndexError(ie) except: raise KeyError( "The program couldn't find the question and term '" + term + "' in the survey file '" + surveyKey + "'. Please make sure that all files contain identical questions and terms." ) #increase the current column number by 1 currentColumn = currentColumn + 1 summary = "" for persID in persIDdict: summary = summary + persIDdict[persID] + "\n" krippendorff_matrix_str = summary.split("\n") krippendorff_matrix = [[int(i) for i in j.split(",")[1:]] for j in krippendorff_matrix_str if j] krippendorff = kp.alpha(reliability_data=krippendorff_matrix) krippendorff_nominal = kp.alpha(reliability_data=krippendorff_matrix, level_of_measurement='nominal') #writes the formatted results to the Krippendorff CSV file with open("lsg__krippendorff__" + resultName + ".csv", "w") as lsg: lsg.write(firstTop + "\n" + secondTop + "\n" + summary + "\n\n\n" + firstBottom + "\n" + secondBottom + "\n\nKrippendorff's alpha for interval metric," + str(krippendorff) + "\nKrippendorff's alpha for nominal metric," + str(krippendorff_nominal))
valueCounts['offensive'][line['tweet_id']], desambigEntries) else: labelOffensive = "N/A" if labelHate == 1: labelHateType = None if line['tweet_id'] in valueCounts['hateTypes']: labelHateType = determineHateTypeLabel( line['tweet_id'], line['text'], valueCounts['hateTypes'][line['tweet_id']], desambigEntries) else: labelHateType = "N/A" else: labelHateType = "N/A" writer.writerow({ 'id': line['tweet_id'], 'text': line['text'], 'HS': str(labelHate), 'OF': str(labelOffensive), 'HT': labelHateType }) print("\nAmbiguous:", countAmbiguous) print( "\nKrippendorff:", krippendorff.alpha(value_counts=np.array(list( valueCounts['hate'].values())), level_of_measurement='nominal'))