def view(self, request): """Get statistics for the website.""" minmax_scores = \ Video.objects.aggregate(**{'max_' + f: Max(F(f)) for f in VIDEO_FIELDS}, **{'min_' + f: Min(F(f)) for f in VIDEO_FIELDS}) try: min_score = min( [v for k, v in minmax_scores.items() if k.startswith('min')]) max_score = max( [v for k, v in minmax_scores.items() if k.startswith('max')]) except Exception: min_score = 0.0 max_score = 0.0 data = { 'certified_experts': UserInformation._annotate_is_certified( UserInformation.objects.all()).filter( _is_certified=1, user__is_active=True).count(), 'pairwise_comparisons': ExpertRating.objects.all().count(), 'videos': Video.objects.all().count(), 'min_score': min_score, 'max_score': max_score } return Response(StatisticsSerializerV2(data, many=False).data)
def sample_certified_users_with_video(video_pks, max_other_users=1, feature_nonan=None, always_include_username=None): """Get a list of usernames which have most ratings on a particular list of videos.""" qs = UserPreferences.objects.all() # removing myself to get more others (will add later) if always_include_username is not None: qs = qs.exclude(user__username=always_include_username) # filter for expert ratings expertrating_filter = Q(pk__in=[]) for video_pk in video_pks: expertrating_filter = expertrating_filter | Q(expertrating__video_1__pk=video_pk) expertrating_filter = expertrating_filter | Q(expertrating__video_2__pk=video_pk) # exclude ratings with a NaN value if feature_nonan is not None: expertrating_filter = expertrating_filter & Q( **{'expertrating__' + feature_nonan + '__isnull': False}) qs = UserInformation._annotate_is_certified(qs, prefix='user__userinformation__') qs = qs.filter(_is_certified=True) qs = qs.annotate(_rating_count=Count('expertrating', filter=expertrating_filter, distinct=True )) filter_rating_exists = Q(pk__in=[]) for video_pk in video_pks: qs = qs.annotate( **{ f'_v_{video_pk}_nonnull': Count('videorating', filter=Q(videorating__video__pk=video_pk, **{'videorating__' + feature_nonan + '__isnull': False})) }) filter_rating_exists = filter_rating_exists | Q(**{f'_v_{video_pk}_nonnull__gt': 0}) # at least one score must be present for any of the videos qs = qs.filter(filter_rating_exists) # expert ratings must exist qs = qs.filter(_rating_count__gt=0) # selecting by top ratings qs = qs.order_by('-_rating_count')[:max_other_users] lst = qs.values_list('user__username') usernames_set = set([x[0] for x in lst]) if always_include_username is not None: usernames_set.add(always_include_username) return usernames_set
def view(self, request): """Get statistics for the website.""" minmax_scores = \ Video.objects.aggregate(**{'max_' + f: Max(F(f)) for f in VIDEO_FIELDS}, **{'min_' + f: Min(F(f)) for f in VIDEO_FIELDS}) try: min_score = min( [v for k, v in minmax_scores.items() if k.startswith('min')]) max_score = max( [v for k, v in minmax_scores.items() if k.startswith('max')]) except Exception: min_score = 0.0 max_score = 0.0 date_week_ago = make_aware( datetime.datetime.now()) - datetime.timedelta(days=7) data = { 'certified_experts': UserInformation._annotate_is_certified( UserInformation.objects.all()).filter( _is_certified=1, user__is_active=True).count(), 'pairwise_comparisons': ExpertRating.objects.all().count(), 'videos': Video.objects.all().count(), 'min_score': min_score, 'max_score': max_score, 'total_experts': UserInformation.objects.filter(is_demo=False).count(), 'weekly_active_ratings': ExpertRating.objects.filter( datetime_lastedit__gte=date_week_ago).count(), 'n_rated_videos': Video.objects.exclude( Q(expertrating_video_1__id=None) & Q(expertrating_video_2__id=None)).distinct().count() } n_sum_comparisons = 0 for f in VIDEO_FIELDS: val = ExpertRating.objects.filter(**{ f + '__isnull': False, f + '_weight__gt': 0 }).distinct().count() data[f"n_{f}_comparisons"] = val n_sum_comparisons += val data["n_sum_comparisons"] = n_sum_comparisons return Response(StatisticsSerializerV2(data, many=False).data)
def thank_contributors(self, request): """Thank contributors for the video.""" video = get_object_or_404(Video, video_id=request.query_params.get( 'video_id', '')) action = request.query_params.get('action', "") user = get_object_or_404(UserPreferences, user__username=request.user.username) if action == 'unthank': n_deleted, _ = VideoRatingThankYou.objects.filter( thanks_from=user, video=video).delete() return Response({ 'status': 'deleted', 'n_deleted': n_deleted }, status=201) elif action == 'thank': qs = UserPreferences.objects.all() # only keeping people who rated the video qs = qs.annotate(n_video=Count( 'expertrating', Q(expertrating__video_1=video) | Q(expertrating__video_2=video))) qs = qs.filter(n_video__gte=1) # and who are certified qs = UserInformation._annotate_is_certified( qs, prefix="user__userinformation__") qs = qs.filter(_is_certified=True) # removing yourself... qs = qs.exclude(id=user.id) contributors = qs.distinct() entries = [ VideoRatingThankYou(thanks_from=user, video=video, thanks_to=contributor) for contributor in contributors ] VideoRatingThankYou.objects.bulk_create(entries, ignore_conflicts=True) else: return Response({'reason': f'Wrong action [{action}]'}, status=400) return Response({'status': 'success'}, status=201)
def search_username(self, request): search_query = request.query_params.get('search_query', '') try: limit = int(request.query_params.get('limit', 20)) assert limit >= 0, "Limit must be positive" except Exception as e: return Response({'reason': str(e)}, status=400) qs = UserInformation.objects.all() qs = UserInformation._annotate_is_certified(qs) qs = qs.filter(_is_certified=True) qs = qs.filter(user__username__icontains=search_query) qs = qs.order_by('user__username') qs = qs[:limit] serializer = OnlyUsernameSerializer(qs, many=True) return Response({ 'results': serializer.data, 'count': len(qs), 'previous': None, 'next': None })
def get_public_append_only_database_as_pd(): """Get the public append-only database.""" # a horrible hack to make django-pandas work with annotations # see https://github.com/chrisdev/django-pandas/blob/master/django_pandas/io.py # see https://github.com/chrisdev/django-pandas/issues/124 # TODO: fix it import django django.db.models.fields.FieldDoesNotExist = django.core.exceptions.FieldDoesNotExist result_df = {} default_features = [constants['DEFAULT_PREFS_VAL'] for _ in VIDEO_FIELDS] # all videos with the tournesol score and all criteria video_df = read_frame(Video.objects.all().annotate( score=get_score_annotation(default_features)), fieldnames=['id', 'video_id', 'score'] + VIDEO_FIELDS) result_df['all_video_scores'] = video_df # all history for ratings, with both videos rated publicly qs = HistoricalExpertRating.objects.all() for v in '12': qs = VideoRatingPrivacy._annotate_privacy( qs, prefix=f'video_{v}__videoratingprivacy', output_prefix=f"_v{v}") qs = qs.filter(_v1_is_public=True, _v2_is_public=True) result_df['comparison_database'] = read_frame( qs, fieldnames=[ 'id', 'duration_ms', 'datetime_lastedit', 'datetime_add', *VIDEO_FIELDS, *[x + '_weight' for x in VIDEO_FIELDS], 'user__user__username', 'video_1__video_id', 'video_2__video_id', 'history_id', 'history_date', 'history_change_reason', 'history_type' ]) # getting all user data (without demo accounts) qs = UserInformation.objects.all().filter(is_demo=False) # adding _is_certified field qs = UserInformation._annotate_is_certified(qs) # Even if 'show my profile' is false, export 'username'. # If 'show my profile' is true, export 'First name', # 'Last name', 'Title', 'Bio', # If 'show online presence' is true, export 'Website', # 'Linkedin', 'Youtube', 'Google scholar', 'Orcid', 'Researchgate', 'Twitter'. # Do NOT share demographic data. # only username fields_basic = UserInformation.BASIC_FIELDS + ['_is_certified'] qs1 = qs.filter(show_my_profile=False) df1 = read_frame(qs1, fieldnames=fields_basic) # username and info fields_profile = UserInformation.PROFILE_FIELDS qs2 = qs.filter(show_my_profile=True, show_online_presence=False) df2 = read_frame(qs2, fieldnames=fields_basic + fields_profile) # username, info and online fields fields_online = UserInformation.ONLINE_FIELDS qs3 = qs.filter(show_my_profile=True, show_online_presence=True) df3 = read_frame(qs3, fieldnames=fields_basic + fields_profile + fields_online) # all contributors df = pd.concat([df1, df2, df3], axis=0, ignore_index=True) result_df['contributors_public'] = df return result_df
def test_download_privacy_public_database(driver, django_db_blocker): """Test that public database is a zip archive, and it only contains public info.""" create_toy_data(django_db_blocker=django_db_blocker, driver=driver, n_users=30, n_videos=100, n_ratings=30) open_tournesol(driver) WebDriverWait(driver, TIME_WAIT).until( EC.presence_of_element_located((By.ID, "id_public_database_download"))) link = driver.find_element_by_id('id_public_database_download').get_attribute('href') data = get(link) assert data.ok assert data.content assert data.headers['content-type'] == 'application/zip' # with open('data.zip', 'wb') as f: # f.write(data.content) # reading dataframes zip_file = BytesIO(data.content) dfs = {} with zipfile.ZipFile(zip_file, 'r') as zf: for fileinfo in zf.infolist(): content = zf.read(fileinfo).decode('ascii') df = pd.read_csv(StringIO(content)) dfs[fileinfo.filename] = df # print(data.content) assert set(dfs.keys()) == set( ['comparison_database.csv', 'contributors_public.csv', 'all_video_scores.csv'] ), f"Wrong files in archive: {dfs.keys()}" # checking comparisons privacy df = dfs['comparison_database.csv'] for _, row in df.iterrows(): username = row['user__user__username'] vid1 = row['video_1__video_id'] vid2 = row['video_2__video_id'] # both videos must be rated publicly! with django_db_blocker.unblock(): for vid in [vid1, vid2]: qs = Video.objects.filter(video_id=vid) assert qs.count() == 1, (qs, qs.count()) up = UserPreferences.objects.get(user__username=username) qs = VideoRatingPrivacy._annotate_privacy(qs, prefix="videoratingprivacy", field_user=up) assert qs.count() == 1, (qs, qs.count()) assert qs.get()._is_public, qs.values() print("Check for", username, vid1, vid2, "successful") # checking user information privacy df = dfs['contributors_public.csv'] for _, row in df.iterrows(): username = row['user__username'] # checking certification status with django_db_blocker.unblock(): qs = UserInformation.objects.filter(user__username=username) assert qs.count() == 1, qs qs = UserInformation._annotate_is_certified(qs) assert qs.count() == 1, qs ui = qs.get() assert ui._is_certified == row['_is_certified'], (dict(row), ui) # checking show_my_profile if not ui.show_my_profile: for f in UserInformation.PROFILE_FIELDS: assert pd.isna(row[f]), row[f] # checking online presence if not ui.show_online_presence or not ui.show_my_profile: for f in UserInformation.ONLINE_FIELDS: assert pd.isna(row[f]), row[f] # checking that protected fields are not included for f in UserInformation.PROTECTED_FIELDS: assert f not in row, (f, row) print("Check for", username, "successful")
def accessible_model_filter(queryset, username): """List of accessible for search models.""" queryset = UserInformation._annotate_n_public_videos(queryset) queryset = queryset.filter((Q(is_demo=False) & Q(_n_public_videos__gte=1)) | Q(user__username=username)) return queryset