def _compute_user_stats(): """ Computes user statistics for the wmt16 evaluation campaign. """ user_stats = [] wmt16_group = Group.objects.filter(name='wmt16') wmt16_users = [] if wmt16_group.exists(): wmt16_users = wmt16_group[0].user_set.all() for user in wmt16_users: _user_stats = HIT.compute_status_for_user(user) _name = user.username _avg_time = seconds_to_timedelta(_user_stats[1]) _total_time = seconds_to_timedelta(_user_stats[2]) _data = (_name, _user_stats[0], _avg_time, _total_time) if _data[0] > 0: user_stats.append(_data) # Sort by total number of completed HITs. user_stats.sort(key=lambda x: x[1]) user_stats.reverse() return user_stats
def _compute_group_stats(): """ Computes group statistics for the WMT16 evaluation campaign. """ group_stats = [] wmt16_group = Group.objects.filter(name='WMT16') wmt16_users = _get_active_users_for_group(wmt16_group) # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in _identify_groups_for_user(user): groups.add(group) # TODO: move this to property of evaluation group or add dedicated data model. # GOAL: should be configurable from within the Django admin backend. # # MINIMAL: move to local_settings.py? # # The following dictionary defines the number of HITs each group should # have completed during the WMT16 evaluation campaign. for group in groups: _name = group.name _group_stats = HIT.compute_status_for_group(group) _total = _group_stats[0] if _total > 0 and not _name in GROUP_HIT_REQUIREMENTS.keys(): _required = 0 elif _name in GROUP_HIT_REQUIREMENTS.keys(): _required = GROUP_HIT_REQUIREMENTS[_name] _delta = _total - _required _data = (_total, _required, _delta) if _data[0] > 0: group_stats.append((_name, _data)) # Sort by number of remaining HITs. group_stats.sort(key=lambda x: x[1][2]) # Add totals at the bottom. global_total = sum([x[1][0] for x in group_stats]) global_required = sum([x[1][1] for x in group_stats]) global_delta = global_total - global_required global_data = (global_total, global_required, global_delta) group_stats.append(("Totals", global_data)) return group_stats
def _compute_language_pair_stats(): """ Computes HIT statistics per language pair. """ language_pair_stats = [] # TODO: move LANGUAGE_PAIR_CHOICES better place. # # Running compute_remaining_hits() will also update completion status for HITs. for choice in LANGUAGE_PAIR_CHOICES: _code = choice[0] _name = choice[1] _remaining_hits = HIT.compute_remaining_hits(language_pair=_code) _completed_hits = HIT.objects.filter(completed=True, mturk_only=False, language_pair=_code) _unique_systems_for_language_pair = set() for _hit in _completed_hits: for _result in RankingResult.objects.filter(item__hit=_hit): for _translation in _result.item.translations: for _system in set(_translation[1]['system'].split(',')): _unique_systems_for_language_pair.add(_system) LOGGER.info(_unique_systems_for_language_pair) _completed_hits = _completed_hits.count() _total_hits = _remaining_hits + _completed_hits _data = ( _name, len(_unique_systems_for_language_pair), (_remaining_hits, 100 * _remaining_hits/float(_total_hits or 1)), (_completed_hits, 100 * _completed_hits/float(_total_hits or 1)) ) language_pair_stats.append(_data) return language_pair_stats
def _compute_language_pair_stats(): """ Computes HIT statistics per language pair. """ language_pair_stats = [] # TODO: move LANGUAGE_PAIR_CHOICES better place. # # Running compute_remaining_hits() will also update completion status for HITs. for choice in LANGUAGE_PAIR_CHOICES: _code = choice[0] _name = choice[1] _remaining_hits = HIT.compute_remaining_hits(language_pair=_code) _completed_hits = HIT.objects.filter(completed=True, mturk_only=False, language_pair=_code) _unique_systems_for_language_pair = set() for _hit in _completed_hits: for _result in RankingResult.objects.filter(item__hit=_hit): for _translation in _result.item.translations: for _system in set(_translation[1]['system'].split(',')): _unique_systems_for_language_pair.add(_system) LOGGER.info(_unique_systems_for_language_pair) _completed_hits = _completed_hits.count() _total_hits = _remaining_hits + _completed_hits _data = (_name, len(_unique_systems_for_language_pair), (_remaining_hits, 100 * _remaining_hits / float(_total_hits or 1)), (_completed_hits, 100 * _completed_hits / float(_total_hits or 1))) language_pair_stats.append(_data) return language_pair_stats
def _compute_user_stats(): """ Computes user statistics for the WMT16 evaluation campaign. """ user_stats = [] wmt16_group = Group.objects.filter(name='WMT16') wmt16_users = _get_active_users_for_group(wmt16_group) for user in wmt16_users: _user_stats = HIT.compute_status_for_user(user) _name = user.username _avg_time = seconds_to_timedelta(_user_stats[1]) _total_time = seconds_to_timedelta(_user_stats[2]) _data = (_name, _user_stats[0], _avg_time, _total_time) if _data[0] > 0: user_stats.append(_data) # Sort by total number of completed HITs. user_stats.sort(key=lambda x: x[1]) user_stats.reverse() return user_stats
def _compute_global_stats(): """ Computes some global statistics for the WMT16 evaluation campaign. """ global_stats = [] wmt16_group = Group.objects.filter(name='WMT16') wmt16_users = _get_active_users_for_group(wmt16_group) # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() # TODO: this implicitly counts A=B comparisons for multi systems. # Basically, inflating the number of pairwise comparisons... Fix! combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in _identify_groups_for_user(user): groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt16_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) # Create new status data snapshot TimedKeyValueData.update_status_if_changed('users', str(len(wmt16_users))) TimedKeyValueData.update_status_if_changed('groups', str(len(groups))) TimedKeyValueData.update_status_if_changed('hits_completed', str(hits_completed)) TimedKeyValueData.update_status_if_changed('hits_remaining', str(hits_remaining)) TimedKeyValueData.update_status_if_changed('ranking_results', str(ranking_results.count())) TimedKeyValueData.update_status_if_changed('system_comparisons', str(system_comparisons)) TimedKeyValueData.update_status_if_changed('duration_per_hit', str(seconds_to_timedelta(avg_time))) TimedKeyValueData.update_status_if_changed('duration_per_task', str(seconds_to_timedelta(avg_user_time))) TimedKeyValueData.update_status_if_changed('duration_total', str(seconds_to_timedelta(total_time))) return global_stats
def overview(request): """ Renders the evaluation tasks overview. """ LOGGER.info('Rendering WMT16 HIT overview for user "{0}".'.format( request.user.username or "Anonymous")) # Re-initialise random number generator. seed(None) # Collect available language pairs for the current user. language_codes = set([x[0] for x in LANGUAGE_PAIR_CHOICES]) language_pairs = request.user.groups.filter(name__in=language_codes) # Collect available annotation projects for the current user. annotation_projects = request.user.project_set.all() hit_data = [] total = [0, 0, 0] for language_pair in language_pairs: for annotation_project in annotation_projects: hit = _compute_next_task_for_user(request.user, annotation_project, language_pair) user_status = HIT.compute_status_for_user(request.user, annotation_project, language_pair) for i in range(3): total[i] = total[i] + user_status[i] if hit: # Convert status seconds back into datetime.time instances. for i in range(2): user_status[i+1] = seconds_to_timedelta(int(user_status[i+1])) hit_data.append( (hit.get_language_pair_display(), hit.get_absolute_url(), hit.hit_id, user_status, annotation_project) ) # Convert total seconds back into datetime.timedelta instances. total[1] = seconds_to_timedelta(int(total[2]) / float(int(total[0]) or 1)) # Remove microseconds to get a nicer timedelta rendering in templates. total[1] = total[1] - timedelta(microseconds=total[1].microseconds) total[2] = seconds_to_timedelta(int(total[2])) groups = _identify_groups_for_user(request.user) group = None if len(groups) > 1: LOGGER.debug(u'User "{0}" assigned to multiple annotation groups: {1}'.format( request.user.username or u'Anonymous', u', '.join([x.name for x in groups])) ) group = groups[0] if group is not None: group_name = group.name group_status = HIT.compute_status_for_group(group) for i in range(2): group_status[i+1] = seconds_to_timedelta(int(group_status[i+1])) else: group_status = None group_name = None LOGGER.debug(u'\n\nHIT data for user "{0}":\n\n{1}\n'.format( request.user.username or "Anonymous", u'\n'.join([u'{0}\t{1}\t{2}\t{3}'.format(*x) for x in hit_data]))) # Compute admin URL for super users. admin_url = None if request.user.is_superuser: admin_url = reverse('admin:index') dictionary = { 'active_page': "OVERVIEW", 'hit_data': hit_data, 'total': total, 'group_name': group_name, 'group_status': group_status, 'admin_url': admin_url, 'title': 'WMT16 Dashboard', 'annotation_groups': [x.name for x in groups], } dictionary.update(BASE_CONTEXT) LOGGER.info(dictionary.values()) return render(request, 'wmt16/overview.html', dictionary)
sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from django.contrib.auth.models import User, Group from appraise.wmt16.models import HIT, Project from appraise.wmt16.views import _identify_groups_for_user # Compute user statistics for all users. user_stats = [] wmt16 = Group.objects.get(name='WMT16') users = wmt16.user_set.all() # Iterate over all users and collect stats for all projects for user in users: for project in Project.objects.all(): _user_stats = HIT.compute_status_for_user(user, project) _name = user.username _email = user.email _project = project.name groups = _identify_groups_for_user(user) _group = "UNDEFINED" if len(groups) > 0: _group = ";".join([g.name for g in groups]) _data = (_name, _email, _project, _group, _user_stats[0], _user_stats[2]) if _data[-2] > 0: user_stats.append(_data) # Sort by research group.
def _compute_group_stats(): """ Computes group statistics for the wmt16 evaluation campaign. """ group_stats = [] wmt16_group = Group.objects.filter(name='wmt16') wmt16_users = [] if wmt16_group.exists(): wmt16_users = wmt16_group[0].user_set.all() # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in user.groups.all(): if group.name == 'wmt16' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # TODO: move this to property of evaluation group or add dedicated data model. # GOAL: should be configurable from within the Django admin backend. # # MINIMAL: move to local_settings.py? # # The following dictionary defines the number of HITs each group should # have completed during the wmt16 evaluation campaign. group_hit_requirements = { # volunteers 'MSR': 0, 'MTMA': 0, # participants, confirmed 'Aalto': 100, 'Abu-Matran': 300, 'AFRL-MITLL': 400, 'AMU-UEDIN': 200, 'CMU': 100, 'CUNI': 500, 'JHU': 1600, 'KIT': 300, 'KIT-LIMSI': 100, 'LIMSI': 300, 'LMU-CUNI': 100, 'METAMIND': 100, 'TBTK': 200, 'Cambridge': 100, 'NRC': 100, 'NYU-Umontreal': 400, 'PJATK': 200, 'PROMT': 500, 'QT21': 100, 'RWTH': 100, 'UEdin': 1900, 'UH': 400, 'USFD': 100, 'UUT': 100, 'YSDA': 200, } for group in groups: _name = group.name if not _name in group_hit_requirements.keys(): continue _group_stats = HIT.compute_status_for_group(group) _total = _group_stats[0] _required = group_hit_requirements[_name] _delta = _total - _required _data = (_total, _required, _delta) if _data[0] > 0: group_stats.append((_name, _data)) # Sort by number of remaining HITs. group_stats.sort(key=lambda x: x[1][2]) # Add totals at the bottom. global_total = sum([x[1][0] for x in group_stats]) global_required = sum([x[1][1] for x in group_stats]) global_delta = global_total - global_required global_data = (global_total, global_required, global_delta) group_stats.append(("Totals", global_data)) return group_stats
# Properly set DJANGO_SETTINGS_MODULE environment variable. os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' PROJECT_HOME = os.path.normpath(os.getcwd() + "/..") sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from django.contrib.auth.models import User, Group from appraise.wmt16.models import HIT # Compute user statistics for all users. user_stats = [] wmt16 = Group.objects.get(name='WMT16') users = wmt16.user_set.all() for user in users: _user_stats = HIT.compute_status_for_user(user) _name = user.username _email = user.email _group = "UNDEFINED" for _g in user.groups.all(): if _g.name.startswith("eng2") \ or _g.name.endswith("2eng") \ or _g.name == "WMT16": continue _group = _g.name break _data = (_name, _email, _group, _user_stats[0], _user_stats[2]) user_stats.append(_data)
def _compute_global_stats(): """ Computes some global statistics for the WMT16 evaluation campaign. """ global_stats = [] wmt16_group = Group.objects.filter(name='WMT16') wmt16_users = _get_active_users_for_group(wmt16_group) # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter(item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() # TODO: this implicitly counts A=B comparisons for multi systems. # Basically, inflating the number of pairwise comparisons... Fix! combinations = factorial(result.systems) / ( factorial(result.systems - 2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in _identify_groups_for_user(user): groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt16_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append( ('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append( ('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append( ('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append( ('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) # Create new status data snapshot TimedKeyValueData.update_status_if_changed('users', str(len(wmt16_users))) TimedKeyValueData.update_status_if_changed('groups', str(len(groups))) TimedKeyValueData.update_status_if_changed('hits_completed', str(hits_completed)) TimedKeyValueData.update_status_if_changed('hits_remaining', str(hits_remaining)) TimedKeyValueData.update_status_if_changed('ranking_results', str(ranking_results.count())) TimedKeyValueData.update_status_if_changed('system_comparisons', str(system_comparisons)) TimedKeyValueData.update_status_if_changed( 'duration_per_hit', str(seconds_to_timedelta(avg_time))) TimedKeyValueData.update_status_if_changed( 'duration_per_task', str(seconds_to_timedelta(avg_user_time))) TimedKeyValueData.update_status_if_changed( 'duration_total', str(seconds_to_timedelta(total_time))) return global_stats
def overview(request): """ Renders the evaluation tasks overview. """ LOGGER.info('Rendering WMT16 HIT overview for user "{0}".'.format( request.user.username or "Anonymous")) # Re-initialise random number generator. seed(None) # Collect available language pairs for the current user. language_codes = set([x[0] for x in LANGUAGE_PAIR_CHOICES]) language_pairs = request.user.groups.filter(name__in=language_codes) # Collect available annotation projects for the current user. annotation_projects = request.user.project_set.all() hit_data = [] total = [0, 0, 0] for language_pair in language_pairs: for annotation_project in annotation_projects: hit = _compute_next_task_for_user(request.user, annotation_project, language_pair) user_status = HIT.compute_status_for_user(request.user, annotation_project, language_pair) for i in range(3): total[i] = total[i] + user_status[i] if hit: # Convert status seconds back into datetime.time instances. for i in range(2): user_status[i + 1] = seconds_to_timedelta( int(user_status[i + 1])) hit_data.append( (hit.get_language_pair_display(), hit.get_absolute_url(), hit.hit_id, user_status, annotation_project)) # Convert total seconds back into datetime.timedelta instances. total[1] = seconds_to_timedelta(int(total[2]) / float(int(total[0]) or 1)) # Remove microseconds to get a nicer timedelta rendering in templates. total[1] = total[1] - timedelta(microseconds=total[1].microseconds) total[2] = seconds_to_timedelta(int(total[2])) groups = _identify_groups_for_user(request.user) group = None if len(groups) > 1: LOGGER.debug( u'User "{0}" assigned to multiple annotation groups: {1}'.format( request.user.username or u'Anonymous', u', '.join([x.name for x in groups]))) group = groups[0] if group is not None: group_name = group.name group_status = HIT.compute_status_for_group(group) for i in range(2): group_status[i + 1] = seconds_to_timedelta(int(group_status[i + 1])) else: group_status = None group_name = None LOGGER.debug(u'\n\nHIT data for user "{0}":\n\n{1}\n'.format( request.user.username or "Anonymous", u'\n'.join([u'{0}\t{1}\t{2}\t{3}'.format(*x) for x in hit_data]))) # Compute admin URL for super users. admin_url = None if request.user.is_superuser: admin_url = reverse('admin:index') dictionary = { 'active_page': "OVERVIEW", 'hit_data': hit_data, 'total': total, 'group_name': group_name, 'group_status': group_status, 'admin_url': admin_url, 'title': 'WMT16 Dashboard', 'annotation_groups': [x.name for x in groups], } dictionary.update(BASE_CONTEXT) LOGGER.info(dictionary.values()) return render(request, 'wmt16/overview.html', dictionary)
# Hotfix potentially wrong ISO codes; we are using ISO-639-3. iso_639_2_to_3_mapping = {'cze': 'ces', 'fre': 'fra', 'ger': 'deu'} for part2_code, part3_code in iso_639_2_to_3_mapping.items(): language_pair = language_pair.replace(part2_code, part3_code) try: _total = _total + 1 _hit_xml = tostring(_child, encoding="utf-8").decode('utf-8') if args.dry_run_enabled: _ = HIT(block_id=block_id, hit_xml=_hit_xml, language_pair=language_pair, mturk_only=args.mturk_only) else: # Use get_or_create() to avoid exact duplicates. We do allow # them for WMT16 to measure intra-annotator agreement... h = HIT(block_id=block_id, hit_xml=_hit_xml, language_pair=language_pair, mturk_only=args.mturk_only) h.save() # pylint: disable-msg=W0703 except Exception, msg: print msg _errors = _errors + 1 print print '[{0}]'.format(_hits_file) print 'Successfully imported {0} HITs, encountered errors for ' \ '{1} HITs.'.format(_total, _errors) print
sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from django.contrib.auth.models import User, Group from appraise.wmt16.models import HIT, Project from appraise.wmt16.views import _identify_groups_for_user # Compute user statistics for all users. user_stats = [] wmt16 = Group.objects.get(name='WMT16') users = wmt16.user_set.all() # Iterate over all users and collect stats for all projects for user in users: for project in Project.objects.all(): _user_stats = HIT.compute_status_for_user(user, project) _name = user.username _email = user.email _project = project.name groups = _identify_groups_for_user(user) _group = "UNDEFINED" if len(groups) > 0: _group = u";".join([g.name for g in groups]) _data = (_name, _email, _project, _group, _user_stats[0], _user_stats[2]) if _data[-2] > 0: user_stats.append(_data) # Sort by research group. user_stats.sort(key=lambda x: x[2])
'fre': 'fra', 'ger': 'deu', 'ron': 'rom', 'tur': 'trk', 'eus': 'baq' } for part2_code, part3_code in iso_639_2_to_3_mapping.items(): language_pair = language_pair.replace(part2_code, part3_code) try: _total = _total + 1 _hit_xml = tostring(_child, encoding="utf-8").decode('utf-8') if args.dry_run_enabled: _ = HIT(block_id=block_id, hit_xml=_hit_xml, language_pair=language_pair, mturk_only=args.mturk_only) else: # Use get_or_create() to avoid exact duplicates. We do allow # them for WMT16 to measure intra-annotator agreement... h = HIT(block_id=block_id, hit_xml=_hit_xml, language_pair=language_pair, mturk_only=args.mturk_only) h.save() # Add HIT instance to given project. project_instance.HITs.add(h) # pylint: disable-msg=W0703
usage: export_wmt16_status.py Exports HIT status for all language pairs. """ from datetime import datetime import os import sys if __name__ == "__main__": # Properly set DJANGO_SETTINGS_MODULE environment variable. os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' PROJECT_HOME = os.path.normpath(os.getcwd() + "/..") sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from appraise.wmt16.models import HIT, LANGUAGE_PAIR_CHOICES remaining_hits = {} for language_pair in [x[0] for x in LANGUAGE_PAIR_CHOICES]: remaining_hits[language_pair] = HIT.compute_remaining_hits( language_pair=language_pair) print print '[{0}]'.format(datetime.now().strftime("%c")) for k, v in remaining_hits.items(): print '{0}: {1:03d}'.format(k, v) print
Author: Christian Federmann <*****@*****.**> usage: export_wmt16_status.py Exports HIT status for all language pairs. """ from datetime import datetime import os import sys if __name__ == "__main__": # Properly set DJANGO_SETTINGS_MODULE environment variable. os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' PROJECT_HOME = os.path.normpath(os.getcwd() + "/..") sys.path.append(PROJECT_HOME) # We have just added appraise to the system path list, hence this works. from appraise.wmt16.models import HIT, LANGUAGE_PAIR_CHOICES remaining_hits = {} for language_pair in [x[0] for x in LANGUAGE_PAIR_CHOICES]: remaining_hits[language_pair] = HIT.compute_remaining_hits( language_pair=language_pair) print() print(('[{0}]'.format(datetime.now().strftime("%c")))) for k, v in list(remaining_hits.items()): print(('{0}: {1:03d}'.format(k, v))) print()
def _compute_global_stats(): """ Computes some global statistics for the wmt16 evaluation campaign. """ global_stats = [] wmt16_group = Group.objects.filter(name='wmt16') wmt16_users = [] if wmt16_group.exists(): wmt16_users = wmt16_group[0].user_set.all() # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in user.groups.all(): if group.name == 'wmt16' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt16_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) return global_stats