# Create a list of user_ids to exclude based on marginal distribution
            exclude_id_based_on_marginal_distribution = calculate_marginal_distribution_for_each_user(project_short_name)

            # Filter the include_ids removing any that should be excluded based on marginal distributions
            include_ids = [id for id in include_ids if id not in exclude_id_based_on_marginal_distribution]

            gold_standard_data = define_gold_standard_data(project_short_name=project_short_name)
            expert_ids = define_gold_standard_ids(project_short_name=project_short_name)
            #combined_dict = build_combined_dict_keyed_on_composite_key(project_short_name=project_short_name, user_ids_to_include=include_ids, expert_project_short_name=gold_standard_data, expert_user_ids_to_include=expert_ids)

### CLEAN FROM HERE ###

            user_dict = create_individual_dict(project_short_name=project_short_name,user_ids_to_include=include_ids)
            if project_short_name == 'tb2-r2.0':
                ihc.get_gs_data_2r20()
            else:
                gs_dict = create_individual_dict(project_short_name=gold_standard_data, user_ids_to_include=expert_ids)

            list_of_composite_keys_in_both = []
            for k in gs_dict.keys():
                if (k in user_dict.keys()) and (':0:0' in k): #converts data stored by keys back to data stored by task/image
                    list_of_composite_keys_in_both.append(k)
            list_of_composite_keys_in_both.sort()

            for key in list_of_composite_keys_in_both:
                user_answers = pandas.DataFrame(user_dict[key]['ihc'])
                gs_answers = pandas.DataFrame(gs_dict[key]['ihc'])

                user_prop_mean = user_answers.sum(axis=1)['proportion']/len(user_answers.keys())
                expert_prop_mean = gs_answers.sum(axis=1)['proportion']/len(gs_answers.keys())