auc_array = np.array(np.zeros([no_iterations, len(no_citizen_scientists)])) count_subjects = 0 for outer_sample in random_samples: count_samples = 0 for inner_sample in outer_sample: dict_with_fixed_no_users = build_combined_dict_keyed_on_composite_key( project_short_name=project_short_name, user_ids_to_include=inner_sample, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids, ) accuracy, sensitivity, specificity, f_measure, kappa = calculate_expert_citizen_consensus_agreement( dict_with_fixed_no_users ) accuracy_array[count_samples, count_subjects] = accuracy sensitivity_array[count_samples, count_subjects] = sensitivity specificity_array[count_samples, count_subjects] = specificity # precision_array[count_samples,count_subjects] = precision f_measure_array[count_samples, count_subjects] = f_measure kappa_array[count_samples, count_subjects] = kappa count_samples += 1 count_subjects += 1 print "Saving statistical measures matrices..." np.save("accuracy_matrix_2D_nouw4b.npy", accuracy_array)
# loop over the projects in the project_configuration above for project in project_configuration: # Get the basic details we need from the project project_short_name = project["project_short_name"] project_name = project["name"] # Get a list of all potential include ids for this projects include_ids = project["include_user_ids"] # Create a list of user_ids for this project that have not completed the required number of tasks exclude_id_based_on_task_count = create_list_of_users_not_completing_req_no_of_tasks(project_short_name, min_no_tasks = 324) # Filter the include_ids removing any that should be excluded based on task count include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count] # Create a list of user_ids to exclude based on marginal distribution exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name) # Filter the include_ids removing any that should be excluded based on marginal distributions include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution] gold_standard_data = define_gold_standard_data(project_short_name=project_short_name) expert_ids = define_gold_standard_ids(project_short_name=project_short_name) combined_dict = build_combined_dict_keyed_on_composite_key(project_short_name=project_short_name, user_ids_to_include=include_ids, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids) calculate_expert_inter_rater_agreement(combined_dict, expert_ids = expert_ids) create_user_expert_consensus_agreement_pre_processing_dict(combined_dict) calculate_expert_citizen_consensus_agreement(combined_dict)