def collate_individual_user_results(): # A list to hold the resulting stats in project_by_project_results = [] # loop over the projects in the project_configuration above for project in project_configuration: # A list to hold the resulting stats in individual_project_results = [] # Get the basic details we need from the project project_short_name = project["project_short_name"] # Get a list of all potential include ids for this projects include_ids = project["include_user_ids"] # Create a list of user_ids for this project that have not completed the required number of tasks exclude_id_based_on_task_count = create_list_of_users_not_completing_req_no_of_tasks(project_short_name, min_no_tasks = 324) # Filter the include_ids removing any that should be excluded based on task count include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count] # Create a list of user_ids to exclude based on marginal distribution exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name) # Filter the include_ids removing any that should be excluded based on marginal distributions include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution] # Select the correct gold standard data set and associated expert_ids for each project gold_standard_data = define_gold_standard_data(project_short_name=project_short_name) expert_ids = define_gold_standard_ids(project_short_name=project_short_name) # Build the combined dict using the filtered ids and relevant gold standard data combined_dict = build_combined_dict_keyed_on_composite_key(project_short_name=project_short_name, user_ids_to_include=include_ids, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids) # Run the analysis on the combined dict create_user_agreement_pre_processing_dict(combined_dict) individual_user_results = create_user_agreement_dict(combined_dict) # Compile the project results object for user_id, results_dict in individual_user_results.items(): project_results = { "project_name": project_short_name.split("-")[-1], "user_id": user_id, "accuracy": results_dict["accuracy_based_on_excluding_tied"], "sensitivity": results_dict["sensitivity_excluding_tied"], "specificity": results_dict["specificity_excluding_tied"], "precision": results_dict["precision_excluding_tied"], "f-measure": results_dict["f_measure_excluding_tied"], "kappa": results_dict["inter_rater_agreement_excluding_tied"] } individual_project_results.append(project_results) project_by_project_results.append(individual_project_results) return project_by_project_results
exclude_id_based_on_task_count = create_list_of_users_not_completing_req_no_of_tasks( project_short_name, min_no_tasks=360 ) # Filter the include_ids removing any that should be excluded based on task count include_ids = [id for id in include_ids if id not in exclude_id_based_on_task_count] # Create a list of user_ids to exclude based on marginal distribution exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name) # Filter the include_ids removing any that should be excluded based on marginal distributions include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution] # Select the appropriate gold standard data and corresponding expert_ids for the project gold_standard_data = define_gold_standard_data(project_short_name=project_short_name) expert_ids = define_gold_standard_ids(project_short_name=project_short_name) # Build the complete dictionary using all of the above combined_dict = build_combined_dict_keyed_on_composite_key( project_short_name=project_short_name, user_ids_to_include=include_ids, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids, ) # Create list of users list_of_users = create_list_of_users(filtered_dict=combined_dict) # Create list of number of citizen scientists no_citizen_scientists = [10, 15]