def get_status_for_user(self, user=None): """ Returns the status information with respect to the given user. """ # pylint: disable-msg=E1101 _task_type = self.get_task_type_display() _status = [] # Compute completion status for this task and the given user. _items = EvaluationItem.objects.filter(task=self).count() _done = NewEvaluationResult.objects.filter(user=user, item__task=self).count() _status.append('{0}/{1}'.format(_done, _items)) # Compute average duration for this task and the given users _results = NewEvaluationResult.objects.filter(user=user, item__task=self) _durations = _results.values_list('duration', flat=True) _durations = [datetime_to_seconds(d) for d in _durations if d] _average_duration = reduce(lambda x, y: (x+y)/2.0, _durations, 0) _status.append('{:.2f} sec'.format(_average_duration)) return _status
def compute_status_for_user(cls, user, language_pair=None): """ Computes the HIT completion status for the given user. If language_pair is given, it constraints on the HITs' language pair. Returns a list containing: - number of completed HITs; - average duration per HIT in seconds; - total duration in seconds. """ hits_qs = cls.objects.filter(users=user) if language_pair: hits_qs = hits_qs.filter(language_pair=language_pair) _completed_hits = hits_qs.count() _durations = [] for hit in hits_qs: _results = RankingResult.objects.filter(user=user, item__hit=hit) _durations.extend(_results.values_list('duration', flat=True)) _durations = [datetime_to_seconds(d) for d in _durations if d] _total_duration = sum(_durations) _average_duration = _total_duration / float(_completed_hits or 1) current_status = [] current_status.append(_completed_hits) current_status.append(_average_duration) current_status.append(_total_duration) return current_status
def get_status_for_user(self, user=None): """ Returns the status information with respect to the given user. """ # pylint: disable-msg=E1101 _task_type = self.get_task_type_display() _status = [] # Compute completion status for this task and the given user. _items = EvaluationItem.objects.filter(task=self).count() _done = NewEvaluationResult.objects.filter(user=user, item__task=self).count() _status.append('{0}/{1}'.format(_done, _items)) # Compute average duration for this task and the given users _results = NewEvaluationResult.objects.filter(user=user, item__task=self) _durations = _results.values_list('duration', flat=True) _durations = [datetime_to_seconds(d) for d in _durations if d] _average_duration = reduce(lambda x, y: (x + y) / 2.0, _durations, 0) _status.append('{:.2f} sec'.format(_average_duration)) return _status
def _compute_global_stats(): """ Computes some global statistics for the WMT14 evaluation campaign. """ global_stats = [] wmt14 = Group.objects.get(name='WMT14') users = wmt14.user_set.all() # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False).count() # Aggregate information about participating groups. groups = set() for user in users: for group in user.groups.all(): if group.name == 'WMT14' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', users.count())) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', hits_completed)) global_stats.append(('HITs remaining', hits_remaining)) global_stats.append(('Ranking results', ranking_results)) global_stats.append(('System comparisons', 10 * ranking_results)) global_stats.append(('Average duration', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (single user)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) return global_stats
def get_status_for_user(self, user=None): """ Returns the status information with respect to the given user. """ # pylint: disable-msg=E1101 _task_type = self.get_task_type_display() _status = [] # Compute completion status for this task and the given user. _items = EvaluationItem.objects.filter(task=self).count() _done = EvaluationResult.objects.filter(user=user, item__task=self).count() _status.append('{0}/{1}'.format(_done, _items)) _percentage = 100 * _done / float(_items or 1) _status.append(_percentage) if _percentage < 33: _status.append(' progress-danger') elif _percentage < 66: _status.append(' progress-warning') else: _status.append(' progress-success') # Compute average duration for this task and the given user. _results = EvaluationResult.objects.filter(user=user, item__task=self) _durations = _results.values_list('duration', flat=True) _durations = [datetime_to_seconds(d) for d in _durations if d] _average_duration = sum(_durations) / (float(len(_durations)) or 1) _status.append('{:.2f} sec'.format(_average_duration)) # We could add task type specific status information here. if _task_type == 'Quality Checking': pass elif _task_type == 'Ranking': pass elif _task_type == 'Post-editing': pass elif _task_type == 'Error classification': pass elif _task_type == '3-Way Ranking': pass elif _task_type == 'Gisting': pass elif _task_type == 'Document-level gisting': pass return _status
def status(request): """ Renders the status overview. """ LOGGER.info('Rendering WMT13 HIT status for user "{0}".'.format(request.user.username or "Anonymous")) # Compute some global stats. global_stats = [] wmt13 = Group.objects.get(name="WMT13") users = wmt13.user_set.all() # Check how many HITs have been completed. hits_completed = 0 for hit in HIT.objects.all(): if hit.users.count() >= 3: hits_completed = hits_completed + 1 # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.all().count() # Aggregate information about participating groups. groups = set() for user in users: for group in user.groups.all(): if group.name == "WMT13" or group.name.startswith("eng2") or group.name.endswith("2eng"): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list("duration", flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) global_stats.append(("Users", users.count())) global_stats.append(("Groups", len(groups))) global_stats.append(("HITs completed", hits_completed)) global_stats.append(("HITs remaining", hits_remaining)) global_stats.append(("Ranking results", ranking_results)) global_stats.append(("System comparisons", 10 * ranking_results)) global_stats.append(("Average duration", seconds_to_timedelta(avg_time))) global_stats.append(("Total duration", seconds_to_timedelta(total_time))) dictionary = { "active_page": "STATUS", "global_stats": global_stats, "commit_tag": COMMIT_TAG, "title": "WMT13 Status", } return render(request, "wmt13/status.html", dictionary)
def get_status_for_user(self, user=None): """ Returns the status information with respect to the given user. """ # pylint: disable-msg=E1101 _task_type = self.get_task_type_display() _status = [] # Compute completion status for this task and the given user. _items = EvaluationItem.objects.filter(task=self).count() _done = EvaluationResult.objects.filter(user=user, item__task=self).count() _status.append('{0}/{1}'.format(_done, _items)) _percentage = 100*_done/float(_items or 1) _status.append(_percentage) if _percentage < 33: _status.append(' progress-danger') elif _percentage < 66: _status.append(' progress-warning') else: _status.append(' progress-success') # Compute average duration for this task and the given user. _results = EvaluationResult.objects.filter(user=user, item__task=self) _durations = _results.values_list('duration', flat=True) _durations = [datetime_to_seconds(d) for d in _durations if d] _average_duration = sum(_durations) / (float(len(_durations)) or 1) _status.append('{:.2f} sec'.format(_average_duration)) # We could add task type specific status information here. if _task_type == 'Quality Checking': pass elif _task_type == 'Ranking': pass elif _task_type == 'Post-editing': pass elif _task_type == 'Error classification': pass elif _task_type == '3-Way Ranking': pass elif _task_type == 'Error Correction Ranking': pass return _status
def export_to_ranking_csv(self): """ Renders this RankingResult as Ranking CSV String. Format: ID,srcLang,tgtLang,user,duration,rank_1,word_count_1,rank_2,word_count_2,rank_3,word_count_3,rank_4,word_count_5,rank_1,word_count_5 """ ranking_csv_data = [] try: ranking_csv_data.append(self.item.source[1]["id"]) except: ranking_csv_data.append(-1) iso639_3_to_name_mapping = {'ces': 'Czech', 'cze': 'Czech', 'deu': 'German', 'ger': 'German', 'eng': 'English', 'spa': 'Spanish', 'fra': 'French', 'fre': 'French', 'rus': 'Russian', 'fin': 'Finnish'} _src_lang = self.item.hit.hit_attributes['source-language'] _trg_lang = self.item.hit.hit_attributes['target-language'] ranking_csv_data.append(iso639_3_to_name_mapping[_src_lang]) # srclang ranking_csv_data.append(iso639_3_to_name_mapping[_trg_lang]) # trglang ranking_csv_data.append(self.user.username) ranking_csv_data.append(datetime_to_seconds(self.duration)) skipped = self.results is None translations = [] if not skipped: for index, translation in enumerate(self.item.translations): _word_count = len(translation[0].split()) _rank = self.results[index] translations.append((_rank, _word_count)) for rank, word_count in translations: ranking_csv_data.append(rank) ranking_csv_data.append(word_count) return ranking_csv_data
def export_to_ranking_csv(self): """ Renders this RankingResult as Ranking CSV String. Format: ID,srcLang,tgtLang,user,duration,rank_1,word_count_1,rank_2,word_count_2,rank_3,word_count_3,rank_4,word_count_5,rank_1,word_count_5 """ # TODO: this needs to be cleaned up... # We'd like to have a minimal version of the ranking CSV output. # Not sure why this one generates ranks and word counts... :) raise NotImplementedError("not ready yet") ranking_csv_data = [] try: ranking_csv_data.append(self.item.source[1]["id"]) except: ranking_csv_data.append(-1) _src_lang = self.item.hit.hit_attributes['source-language'] _trg_lang = self.item.hit.hit_attributes['target-language'] ranking_csv_data.append(ISO639_3_TO_NAME_MAPPING[_src_lang]) # srclang ranking_csv_data.append(ISO639_3_TO_NAME_MAPPING[_trg_lang]) # trglang ranking_csv_data.append(self.user.username) ranking_csv_data.append(str(datetime_to_seconds(self.duration))) skipped = self.results is None translations = [] if not skipped: for index, translation in enumerate(self.item.translations): _word_count = len(translation[0].split()) _rank = self.results[index] translations.append((_rank, _word_count)) for rank, word_count in translations: ranking_csv_data.append(str(rank)) ranking_csv_data.append(str(word_count)) return u",".join(ranking_csv_data)
def get_status_for_users(self): """ Returns the status information with respect to all users. """ _status = [] # Compute completion status for this task and all possible users. _items = EvaluationItem.objects.filter(task=self).count() _done = [] for user in self.users.all(): _done.append( EvaluationResult.objects.filter(user=user, item__task=self).count()) # Minimal number of completed items counts here. _status.append('{0}/{1}'.format(min(_done or [0]), _items)) _percentage = 100 * min(_done or [0]) / float(_items or 1) _status.append(_percentage) if _percentage < 33: _status.append(' progress-danger') elif _percentage < 66: _status.append(' progress-warning') else: _status.append(' progress-success') # Compute average duration for this task and all possible users. _durations = [] for user in self.users.all(): _results = EvaluationResult.objects.filter(user=user, item__task=self) _durations.extend(_results.values_list('duration', flat=True)) _durations = [datetime_to_seconds(d) for d in _durations if d] _average_duration = sum(_durations) / (float(len(_durations)) or 1) _status.append('{:.2f} sec'.format(_average_duration)) return _status
def get_status_for_users(self): """ Returns the status information with respect to all users. """ _status = [] # Compute completion status for this task and all possible users. _items = EvaluationItem.objects.filter(task=self).count() _done = [] for user in self.users.all(): _done.append(EvaluationResult.objects.filter(user=user, item__task=self).count()) # Minimal number of completed items counts here. _status.append('{0}/{1}'.format(min(_done or [0]), _items)) _percentage = 100*min(_done or [0])/float(_items or 1) _status.append(_percentage) if _percentage < 33: _status.append(' progress-danger') elif _percentage < 66: _status.append(' progress-warning') else: _status.append(' progress-success') # Compute average duration for this task and all possible users. _durations = [] for user in self.users.all(): _results = EvaluationResult.objects.filter(user=user, item__task=self) _durations.extend(_results.values_list('duration', flat=True)) _durations = [datetime_to_seconds(d) for d in _durations if d] _average_duration = sum(_durations) / (float(len(_durations)) or 1) _status.append('{:.2f} sec'.format(_average_duration)) return _status
def _compute_global_stats(): """ Computes some global statistics for the WMT16 evaluation campaign. """ global_stats = [] wmt16_group = Group.objects.filter(name='WMT16') wmt16_users = _get_active_users_for_group(wmt16_group) # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() # TODO: this implicitly counts A=B comparisons for multi systems. # Basically, inflating the number of pairwise comparisons... Fix! combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in _identify_groups_for_user(user): groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt16_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) # Create new status data snapshot TimedKeyValueData.update_status_if_changed('users', str(len(wmt16_users))) TimedKeyValueData.update_status_if_changed('groups', str(len(groups))) TimedKeyValueData.update_status_if_changed('hits_completed', str(hits_completed)) TimedKeyValueData.update_status_if_changed('hits_remaining', str(hits_remaining)) TimedKeyValueData.update_status_if_changed('ranking_results', str(ranking_results.count())) TimedKeyValueData.update_status_if_changed('system_comparisons', str(system_comparisons)) TimedKeyValueData.update_status_if_changed('duration_per_hit', str(seconds_to_timedelta(avg_time))) TimedKeyValueData.update_status_if_changed('duration_per_task', str(seconds_to_timedelta(avg_user_time))) TimedKeyValueData.update_status_if_changed('duration_total', str(seconds_to_timedelta(total_time))) return global_stats
def _compute_global_stats(): """ Computes some global statistics for the WMT16 evaluation campaign. """ global_stats = [] wmt16_group = Group.objects.filter(name='WMT16') wmt16_users = _get_active_users_for_group(wmt16_group) # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter(item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() # TODO: this implicitly counts A=B comparisons for multi systems. # Basically, inflating the number of pairwise comparisons... Fix! combinations = factorial(result.systems) / ( factorial(result.systems - 2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt16_users: for group in _identify_groups_for_user(user): groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt16_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append( ('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append( ('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append( ('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append( ('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) # Create new status data snapshot TimedKeyValueData.update_status_if_changed('users', str(len(wmt16_users))) TimedKeyValueData.update_status_if_changed('groups', str(len(groups))) TimedKeyValueData.update_status_if_changed('hits_completed', str(hits_completed)) TimedKeyValueData.update_status_if_changed('hits_remaining', str(hits_remaining)) TimedKeyValueData.update_status_if_changed('ranking_results', str(ranking_results.count())) TimedKeyValueData.update_status_if_changed('system_comparisons', str(system_comparisons)) TimedKeyValueData.update_status_if_changed( 'duration_per_hit', str(seconds_to_timedelta(avg_time))) TimedKeyValueData.update_status_if_changed( 'duration_per_task', str(seconds_to_timedelta(avg_user_time))) TimedKeyValueData.update_status_if_changed( 'duration_total', str(seconds_to_timedelta(total_time))) return global_stats
def _compute_global_stats(): """ Computes some global statistics for the WMT15 evaluation campaign. """ global_stats = [] wmt15_group = Group.objects.filter(name='WMT15') wmt15_users = [] if wmt15_group.exists(): wmt15_users = wmt15_group[0].user_set.all() # Check how many HITs have been completed. We now consider a HIT to be # completed once it has been annotated by one or more annotators. # # Before we required `hit.users.count() >= 3` for greater overlap. hits_completed = HIT.objects.filter(mturk_only=False, completed=True).count() # Check any remaining active HITs which are not yet marked complete. for hit in HIT.objects.filter(active=True, mturk_only=False, completed=False): if hit.users.count() >= 1: hits_completed = hits_completed + 1 hit.completed = True hit.save() # Compute remaining HITs for all language pairs. hits_remaining = HIT.compute_remaining_hits() # Compute number of results contributed so far. ranking_results = RankingResult.objects.filter( item__hit__completed=True, item__hit__mturk_only=False) from math import factorial system_comparisons = 0 for result in ranking_results: result.reload_dynamic_fields() combinations = factorial(result.systems)/(factorial(result.systems-2) * 2) if result.systems > 2 else 0 system_comparisons = system_comparisons + combinations # Aggregate information about participating groups. groups = set() for user in wmt15_users: for group in user.groups.all(): if group.name == 'WMT15' or group.name.startswith('eng2') \ or group.name.endswith('2eng'): continue groups.add(group) # Compute average/total duration over all results. durations = RankingResult.objects.all().values_list('duration', flat=True) total_time = sum([datetime_to_seconds(x) for x in durations]) avg_time = total_time / float(hits_completed or 1) avg_user_time = total_time / float(3 * hits_completed or 1) global_stats.append(('Users', len(wmt15_users))) global_stats.append(('Groups', len(groups))) global_stats.append(('HITs completed', '{0:,}'.format(hits_completed))) global_stats.append(('HITs remaining', '{0:,}'.format(hits_remaining))) global_stats.append(('Ranking results', '{0:,}'.format(ranking_results.count()))) global_stats.append(('System comparisons', '{0:,}'.format(system_comparisons))) global_stats.append(('Average duration (per HIT)', seconds_to_timedelta(avg_time))) global_stats.append(('Average duration (per task)', seconds_to_timedelta(avg_user_time))) global_stats.append(('Total duration', seconds_to_timedelta(total_time))) return global_stats