def get_top_raters(self, video): video_obj = self.get_video_object(video) request = self.context.get("request", {}) username = search_username_from_request(request) if not video_obj: qs = UserInformation.objects.none() elif username: qs = UserInformation.objects.filter(user__username=username) else: qs = video_obj.certified_top_raters() # annotating with whether the rating is public pref_privacy = 'user__userpreferences__videoratingprivacy' qs = VideoRatingPrivacy._annotate_privacy( qs=qs, prefix=pref_privacy, field_user=None, filter_add={f'{pref_privacy}__video': video_obj}) qs = qs.annotate( n_public_rating=Case(When(_is_public=True, then=Value(1)), default=Value(0), output_field=IntegerField())) return qs
def handle(self, **options): is_public = options['set_public'] print(f"Setting all ambiguous rating privacys to PUBLIC={is_public}") for user in UserPreferences.objects.all(): # ratings by user without privacy settings qs = VideoRating.objects.filter(user=user) qs = qs.annotate( _n_privacys=Count('video__videoratingprivacy', distinct=True, filter=Q( video__videoratingprivacy__user=user))) qs = qs.filter(_n_privacys=0) video_ids = [x[0] for x in qs.values_list('video__video_id')] print( f"User {user} has the following ambiguous privacy settings: {video_ids}" ) videos = [Video.objects.get(video_id=vid) for vid in video_ids] objs = [ VideoRatingPrivacy(user=user, video=video, is_public=is_public) for video in videos ] if not options['dry_run']: VideoRatingPrivacy.objects.bulk_create(objs) print(f"Objects for {user}: {objs}")
def test_privacy(self): VideoRatingPrivacy.objects.all().delete() qs = Video.objects.all() qs = VideoRatingPrivacy._annotate_privacy(qs, prefix='videoratingprivacy', field_user=self.user_prefs, default_value=True) assert len(qs) == 1 assert qs[0]._is_public is True qs = Video.objects.all() qs = VideoRatingPrivacy._annotate_privacy(qs, prefix='videoratingprivacy', field_user=self.user_prefs, default_value=False) assert len(qs) == 1 assert qs[0]._is_public is False VideoRatingPrivacy.objects.all().delete() VideoRatingPrivacy.objects.create(video=self.video, user=self.user_prefs, is_public=True) qs = Video.objects.all() qs = VideoRatingPrivacy._annotate_privacy(qs, prefix='videoratingprivacy', field_user=self.user_prefs, default_value=True) assert len(qs) == 1 assert qs[0]._is_public is True qs = Video.objects.all() qs = VideoRatingPrivacy._annotate_privacy(qs, prefix='videoratingprivacy', field_user=self.user_prefs, default_value=False) assert len(qs) == 1 assert qs[0]._is_public is True VideoRatingPrivacy.objects.all().delete() VideoRatingPrivacy.objects.create(video=self.video, user=self.user_prefs, is_public=False) qs = Video.objects.all() qs = VideoRatingPrivacy._annotate_privacy(qs, prefix='videoratingprivacy', field_user=self.user_prefs, default_value=True) assert len(qs) == 1 assert qs[0]._is_public is False qs = Video.objects.all() qs = VideoRatingPrivacy._annotate_privacy(qs, prefix='videoratingprivacy', field_user=self.user_prefs, default_value=False) assert len(qs) == 1 assert qs[0]._is_public is False
def video_rating_statistics(self, request): """Get statistical data on video ratings.""" qs = VideoRating.objects.all() # filtering according to the query parameters qs = self.filter_queryset(qs) # annotate: total score given the preferences vector = get_user_preferences(self.request).features_as_vector_centered vector = update_preferences_vector_from_request(vector, self.request.query_params) qs = qs.annotate(score=get_score_annotation(vector)) # annotate: public/private rating qs = VideoRatingPrivacy._annotate_privacy( qs, prefix='video__videoratingprivacy', field_user=F('user'), default_value=None, annotate_bool=True, annotate_n=False) # either public, or myself qs = qs.annotate(_is_public_or_myself=Case( When(_is_public=True, then=Value(True)), When(user__user__username=request.user.username, then=Value(True)), default=Value(False), output_field=BooleanField(), )) # total number of pairwise comparisons by this video by this user qs = qs.annotate(n_cmp_1=Count('video__expertrating_video_1', distinct=True, filter=Q(video__expertrating_video_1__user=F('user')))) qs = qs.annotate(n_cmp_2=Count('video__expertrating_video_2', distinct=True, filter=Q(video__expertrating_video_2__user=F('user')))) qs = qs.annotate(n_comparisons=F('n_cmp_1') + F('n_cmp_2')) # annotate: for public ones, give the username, for the rest, give None qs = qs.annotate(public_username=Case( When(_is_public_or_myself=True, then=F('user__user__username')), default=Value(None), output_field=CharField())) # deterministic ordering qs = qs.order_by('pk') page = self.paginate_queryset(qs) if page is not None: serializer = VideoRatingsStatisticsSerializerV2(page, many=True) return self.get_paginated_response(serializer.data) serializer = VideoRatingsStatisticsSerializerV2(qs, many=True) return Response(serializer.data)
def search_username_from_request(request): """Get the username to use the scores from.""" if not hasattr(request, 'query_params'): return False if not isinstance(request.query_params, dict): return False username = request.query_params.get('search_model', None) if username: qs = VideoRating.objects.filter(user__user__username=username) qs = VideoRatingPrivacy._annotate_privacy(qs=qs) qs = qs.filter(_is_public=True) n_public_videos = qs.count() if username != request.user.username and n_public_videos == 0: raise PermissionDenied() return username return False
def set_all_rating_privacy(self, request): """Set all video rating privacy.""" user = get_object_or_404(UserPreferences, user__username=request.user.username) # videos rated by the user videos = Video.objects.filter( Q(expertrating_video_1__user=user) | Q(expertrating_video_2__user=user)).distinct() # creating privacy objects if they don't exist... if videos: VideoRatingPrivacy.objects.bulk_create( [VideoRatingPrivacy(user=user, video=v) for v in videos], ignore_conflicts=True) is_public = request.query_params.get('is_public', 'true') == 'true' VideoRatingPrivacy.objects.filter(user=user).update(is_public=is_public) # updating video properties update_user_username(user.user.username) return Response({'status': 'success'}, status=201)
def get_public_append_only_database_as_pd(): """Get the public append-only database.""" # a horrible hack to make django-pandas work with annotations # see https://github.com/chrisdev/django-pandas/blob/master/django_pandas/io.py # see https://github.com/chrisdev/django-pandas/issues/124 # TODO: fix it import django django.db.models.fields.FieldDoesNotExist = django.core.exceptions.FieldDoesNotExist result_df = {} default_features = [constants['DEFAULT_PREFS_VAL'] for _ in VIDEO_FIELDS] # all videos with the tournesol score and all criteria video_df = read_frame(Video.objects.all().annotate( score=get_score_annotation(default_features)), fieldnames=['id', 'video_id', 'score'] + VIDEO_FIELDS) result_df['all_video_scores'] = video_df # all history for ratings, with both videos rated publicly qs = HistoricalExpertRating.objects.all() for v in '12': qs = VideoRatingPrivacy._annotate_privacy( qs, prefix=f'video_{v}__videoratingprivacy', output_prefix=f"_v{v}") qs = qs.filter(_v1_is_public=True, _v2_is_public=True) result_df['comparison_database'] = read_frame( qs, fieldnames=[ 'id', 'duration_ms', 'datetime_lastedit', 'datetime_add', *VIDEO_FIELDS, *[x + '_weight' for x in VIDEO_FIELDS], 'user__user__username', 'video_1__video_id', 'video_2__video_id', 'history_id', 'history_date', 'history_change_reason', 'history_type' ]) # getting all user data (without demo accounts) qs = UserInformation.objects.all().filter(is_demo=False) # adding _is_certified field qs = UserInformation._annotate_is_certified(qs) # Even if 'show my profile' is false, export 'username'. # If 'show my profile' is true, export 'First name', # 'Last name', 'Title', 'Bio', # If 'show online presence' is true, export 'Website', # 'Linkedin', 'Youtube', 'Google scholar', 'Orcid', 'Researchgate', 'Twitter'. # Do NOT share demographic data. # only username fields_basic = UserInformation.BASIC_FIELDS + ['_is_certified'] qs1 = qs.filter(show_my_profile=False) df1 = read_frame(qs1, fieldnames=fields_basic) # username and info fields_profile = UserInformation.PROFILE_FIELDS qs2 = qs.filter(show_my_profile=True, show_online_presence=False) df2 = read_frame(qs2, fieldnames=fields_basic + fields_profile) # username, info and online fields fields_online = UserInformation.ONLINE_FIELDS qs3 = qs.filter(show_my_profile=True, show_online_presence=True) df3 = read_frame(qs3, fieldnames=fields_basic + fields_profile + fields_online) # all contributors df = pd.concat([df1, df2, df3], axis=0, ignore_index=True) result_df['contributors_public'] = df return result_df
def test_download_privacy_public_database(driver, django_db_blocker): """Test that public database is a zip archive, and it only contains public info.""" create_toy_data(django_db_blocker=django_db_blocker, driver=driver, n_users=30, n_videos=100, n_ratings=30) open_tournesol(driver) WebDriverWait(driver, TIME_WAIT).until( EC.presence_of_element_located((By.ID, "id_public_database_download"))) link = driver.find_element_by_id('id_public_database_download').get_attribute('href') data = get(link) assert data.ok assert data.content assert data.headers['content-type'] == 'application/zip' # with open('data.zip', 'wb') as f: # f.write(data.content) # reading dataframes zip_file = BytesIO(data.content) dfs = {} with zipfile.ZipFile(zip_file, 'r') as zf: for fileinfo in zf.infolist(): content = zf.read(fileinfo).decode('ascii') df = pd.read_csv(StringIO(content)) dfs[fileinfo.filename] = df # print(data.content) assert set(dfs.keys()) == set( ['comparison_database.csv', 'contributors_public.csv', 'all_video_scores.csv'] ), f"Wrong files in archive: {dfs.keys()}" # checking comparisons privacy df = dfs['comparison_database.csv'] for _, row in df.iterrows(): username = row['user__user__username'] vid1 = row['video_1__video_id'] vid2 = row['video_2__video_id'] # both videos must be rated publicly! with django_db_blocker.unblock(): for vid in [vid1, vid2]: qs = Video.objects.filter(video_id=vid) assert qs.count() == 1, (qs, qs.count()) up = UserPreferences.objects.get(user__username=username) qs = VideoRatingPrivacy._annotate_privacy(qs, prefix="videoratingprivacy", field_user=up) assert qs.count() == 1, (qs, qs.count()) assert qs.get()._is_public, qs.values() print("Check for", username, vid1, vid2, "successful") # checking user information privacy df = dfs['contributors_public.csv'] for _, row in df.iterrows(): username = row['user__username'] # checking certification status with django_db_blocker.unblock(): qs = UserInformation.objects.filter(user__username=username) assert qs.count() == 1, qs qs = UserInformation._annotate_is_certified(qs) assert qs.count() == 1, qs ui = qs.get() assert ui._is_certified == row['_is_certified'], (dict(row), ui) # checking show_my_profile if not ui.show_my_profile: for f in UserInformation.PROFILE_FIELDS: assert pd.isna(row[f]), row[f] # checking online presence if not ui.show_online_presence or not ui.show_my_profile: for f in UserInformation.ONLINE_FIELDS: assert pd.isna(row[f]), row[f] # checking that protected fields are not included for f in UserInformation.PROTECTED_FIELDS: assert f not in row, (f, row) print("Check for", username, "successful")
def get_queryset(self, pk=None): """All videos except for null ones.""" queryset = Video.objects.filter(is_unlisted=False).values() request = self.request fields = [x.name for x in Video._meta.fields] for f in VIDEO_FIELDS: fields.remove(f) def get_score_annotation(user_preferences_vector): """Returns an sql object annotating queries with the video ratings (sclar product).""" return sum( [F(f) * v for f, v in zip(VIDEO_FIELDS, user_preferences_vector)]) features = self.get_features_from_request() default_features = [constants['DEFAULT_PREFS_VAL'] for _ in VIDEO_FIELDS] search_username = self.need_scores_for_username() # computing score inside the database if search_username: fields_exclude = set(Video.COMPUTED_PROPERTIES) fields = [f for f in fields if f not in fields_exclude] queryset = queryset.values(*fields) queryset = queryset.annotate(**{key: F(f'videorating__{key}') for key in VIDEO_FIELDS}, user=F( 'videorating__user__user__username')).filter( user=search_username) # for myself, allow showing public/non-public videos if search_username == request.user.username: is_public = request.query_params.get('show_all_my_videos', 'true') == 'false' print(is_public) else: # for other people, only show public videos is_public = True # keeping only public videos if is_public: queryset = VideoRatingPrivacy._annotate_privacy( queryset, prefix='videoratingprivacy', field_user=None, filter_add={'videoratingprivacy__user__user__username': search_username} ) queryset = queryset.filter(_is_public=True) queryset = queryset.annotate(rating_n_experts=Value(1, IntegerField())) q1 = Q(expertrating_video_1__user__user__username=search_username) q2 = Q(expertrating_video_2__user__user__username=search_username) c1 = Count('expertrating_video_1', q1, distinct=True) c2 = Count('expertrating_video_2', q2, distinct=True) queryset = queryset.annotate(rating_n_ratings=c1 + c2) queryset = queryset.annotate(n_public_experts=Value(1, IntegerField())) queryset = queryset.annotate(n_private_experts=Value(0, IntegerField())) # TODO: a hack. improve this queryset = queryset.annotate( public_experts=Value("", CharField())) # logging model usage in search if self.request.user.is_authenticated: RepresentativeModelUsage.objects.get_or_create( viewer=UserPreferences.objects.get(user__username=self.request.user.username), model=UserPreferences.objects.get(user__username=search_username) ) queryset = queryset.annotate( score_preferences_term=get_score_annotation(features)) queryset = queryset.annotate( tournesol_score=get_score_annotation(default_features)) queryset = queryset.annotate( score_search_term_=Value( 0.0, FloatField())) if request.query_params.get('search'): # computing the postgres score for search if connection.vendor.startswith('postgres'): s_query = request.query_params.get('search', '') def word_to_query(w): """Convert one word into a query.""" queries = [] queries.append(SearchQuery(w, search_type='raw')) queries.append(SearchQuery(w + ':*', search_type='raw')) return reduce(lambda x, y: x | y, queries) def words_to_query(s_query, max_len=100, max_word_len=20): """Convert a string with words into a SearchQuery.""" s_query = s_query[:max_len] s_query = s_query.split(' ') s_query = [''.join(filter(str.isalnum, x)) for x in s_query] s_query = [x for x in s_query if 1 <= len(x) <= max_word_len] s_query = [word_to_query(x) for x in s_query] if not s_query: return SearchQuery('') return reduce(lambda x, y: x & y, s_query) s_query = words_to_query(s_query) s_vectors = [SearchVector(f, weight=w) for f, w in zip(self.search_fields, self.search_weights)] s_vector = reduce(lambda x, y: x + y, s_vectors) queryset = queryset.annotate( score_search_term_=SearchRank(s_vector, s_query)) else: # in other databases, using basic filtering queryset = filters_.SearchFilter().filter_queryset(self.request, queryset, self) queryset = queryset.annotate( score_search_term_=Value( 1.0, FloatField())) queryset = queryset.annotate( score_search_term=F('score_search_term_') * VideoSearchEngine.VIDEO_SEARCH_COEFF) queryset = queryset.annotate( score=F('score_preferences_term') + F('score_search_term')) return queryset