def _compute_language_pair_stats(): """ Computes HIT statistics per language pair. """ language_pair_stats = [] # TODO: move LANGUAGE_PAIR_CHOICES better place. # # Running compute_remaining_hits() will also update completion status for HITs. for choice in LANGUAGE_PAIR_CHOICES: _code = choice[0] _name = choice[1] _remaining_hits = HIT.compute_remaining_hits(language_pair=_code) _completed_hits = HIT.objects.filter(completed=True, mturk_only=False, language_pair=_code) _unique_systems_for_language_pair = set() for _hit in _completed_hits: for _result in RankingResult.objects.filter(item__hit=_hit): for _translation in _result.item.translations: for _system in set(_translation[1]['system'].split(',')): _unique_systems_for_language_pair.add(_system) _completed_hits = _completed_hits.count() _total_hits = _remaining_hits + _completed_hits _data = ( _name, len(_unique_systems_for_language_pair), (_remaining_hits, 100 * _remaining_hits/float(_total_hits or 1)), (_completed_hits, 100 * _completed_hits/float(_total_hits or 1)) ) language_pair_stats.append(_data) return language_pair_stats
def _compute_global_stats(): """ Computes some global statistics for the WMT15 evaluation campaign. """ global_stats = [] wmt15_group = Group.objects.filter(name='WMT15') wmt15_users = [] if wmt15_group.exists(): wmt15_users = wmt15_group[0].user_set.all() # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt15_users: for group in user.groups.all(): if group.name == 'WMT15' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt15_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) return global_stats