def filter_popular(self, queryset, value): """ Recommend content that is popular with all users. :param queryset: all content nodes for this channel :param value: id of currently logged in user, or none if user is anonymous :return: 10 most popular content nodes """ if ContentSessionLog.objects.count() < 50: # return 25 random content nodes if not enough session logs pks = queryset.values_list('pk', flat=True).exclude(kind=content_kinds.TOPIC) # .count scales with table size, so can get slow on larger channels count_cache_key = 'content_count_for_{}'.format(get_active_content_database()) count = cache.get(count_cache_key) or min(pks.count(), 25) return queryset.filter(pk__in=sample(list(pks), count)) cache_key = 'popular_for_{}'.format(get_active_content_database()) if cache.get(cache_key): return cache.get(cache_key) # get the most accessed content nodes content_counts_sorted = ContentSessionLog.objects \ .filter(channel_id=get_active_content_database()) \ .values_list('content_id', flat=True) \ .annotate(Count('content_id')) \ .order_by('-content_id__count') most_popular = queryset.filter(content_id__in=list(content_counts_sorted[:10])) # cache the popular results queryset for 10 minutes, for efficiency cache.set(cache_key, most_popular, 60 * 10) return most_popular
def filter_popular(self, queryset, value): """ Recommend content that is popular with all users. :param queryset: all content nodes for this channel :param value: id of currently logged in user, or none if user is anonymous :return: 10 most popular content nodes """ if ContentSessionLog.objects.count() < 50: # return 25 random content nodes if not enough session logs pks = queryset.values_list('pk', flat=True).exclude(kind__in=['topic', '']) count = min(pks.count(), 25) return queryset.filter(pk__in=sample(list(pks), count)) cache_key = 'popular_for_{}'.format(get_active_content_database()) if cache.get(cache_key): return cache.get(cache_key) # get the most accessed content nodes content_counts_sorted = ContentSessionLog.objects \ .filter(channel_id=get_active_content_database()) \ .values_list('content_id', flat=True) \ .annotate(Count('content_id')) \ .order_by('-content_id__count') most_popular = queryset.filter(content_id__in=list(content_counts_sorted[:10])) # cache the popular results queryset for 10 minutes, for efficiency cache.set(cache_key, most_popular, 60 * 10) return most_popular
def filter_resume(self, queryset, value): """ Recommend content that the user has recently engaged with, but not finished. :param queryset: all content nodes for this channel :param value: id of currently logged in user, or none if user is anonymous :return: 10 most recently viewed content nodes """ # if user is anonymous, return no nodes if not value: return queryset.none() # get the most recently viewed, but not finished, content nodes content_ids = ContentSummaryLog.objects \ .filter(user=value, channel_id=get_active_content_database()) \ .exclude(progress=1) \ .order_by('end_timestamp') \ .values_list('content_id', flat=True) \ .distinct() # If no logs, don't bother doing the other queries if not content_ids: return queryset.none() resume = queryset.filter(content_id__in=list(content_ids[:10])) return resume
def list(self, request, **kwargs): with using_content_database(kwargs['channel_id']): file_summary = models.File.objects.aggregate( total_files=Count('pk'), total_file_size=Sum('file_size')) file_summary['channel_id'] = get_active_content_database() # Need to wrap in an array to be fetchable as a Collection on client return Response([file_summary])
def filter_by_content_ids(self, content_ids, content_id_lookup="content_id"): """ Filter a set of logs by content_id, using content_ids from the provided list or queryset. """ if default_database_is_attached(): # perform the query using an efficient cross-database join, if possible return self.using(get_active_content_database()).filter(**{content_id_lookup + "__in": content_ids}) else: # if the databases can't be joined, convert the content_ids into a list and pass in return self.filter(**{content_id_lookup + "__in": list(content_ids)})
def ancestors(self, request, **kwargs): cache_key = 'contentnode_ancestors_{db}_{pk}'.format(db=get_active_content_database(), pk=kwargs.get('pk')) if cache.get(cache_key) is not None: return Response(cache.get(cache_key)) ancestors = list(self.get_object().get_ancestors().values('pk', 'title')) cache.set(cache_key, ancestors, 60 * 10) return Response(ancestors)
def get_queryset(self): attempted_mastery_logs = MasteryLog.objects.filter( attemptlogs__isnull=False) query_node = ContentNode.objects.get(pk=self.kwargs['content_node_id']) if self.request.query_params.get('last_active_time'): # Last active time specified datetime_cutoff = parse( self.request.query_params.get('last_active_time')) else: datetime_cutoff = timezone.now() - datetime.timedelta(7) # Set on the kwargs to pass into the serializer self.kwargs['last_active_time'] = datetime_cutoff.isoformat() if default_database_is_attached( ): # if possible, do a direct join between the content and default databases channel_alias = get_active_content_database() SummaryLogManager = ContentSummaryLog.objects.using(channel_alias) else: SummaryLogManager = ContentSummaryLog.objects recent_content_items = SummaryLogManager.filter_by_topic( query_node).filter( Q(progress__gt=0) | Q(masterylogs__in=attempted_mastery_logs), user__in=list( get_members_or_user(self.kwargs['collection_kind'], self.kwargs['collection_id'])), end_timestamp__gte=datetime_cutoff).values_list('content_id', flat=True) if not default_database_is_attached(): recent_content_items = list(recent_content_items) # note from rtibbles: # As good as either I or jamalex could come up with to ensure that we only return # unique content_id'ed ContentNodes from the coach recent report endpoint. # Would have loved to use distinct('content_id'), but unfortunately DISTINCT ON is Postgresql only pks_with_unique_content_ids = ContentNode.objects.filter( content_id__in=recent_content_items).values('content_id').order_by( 'lft').annotate(pk=Min('pk')).values_list('pk', flat=True) return ContentNode.objects.filter( pk__in=pks_with_unique_content_ids).order_by('lft')
def filter_resume(self, queryset, value): """ Recommend content that the user has recently engaged with, but not finished. :param queryset: all content nodes for this channel :param value: id of currently logged in user, or none if user is anonymous :return: 10 most recently viewed content nodes """ # if user is anonymous, return no nodes if not value: return queryset.none() # get the most recently viewed, but not finished, content nodes content_ids = ContentSummaryLog.objects \ .filter(user=value, channel_id=get_active_content_database()) \ .exclude(progress=1) \ .order_by('end_timestamp') \ .values_list('content_id', flat=True) \ .distinct() resume = queryset.filter(content_id__in=list(content_ids[:10])) return resume
def get_progress_and_last_active(target_nodes, **kwargs): # Prepare dictionaries to output the progress and last active, keyed by content_id output_progress_dict = {} output_last_active_dict = {} # Get a list of all the users that we are querying users = list(get_members_or_user(kwargs['collection_kind'], kwargs['collection_id'])) # Get a list of all content ids for all target nodes and their descendants content_ids = target_nodes.get_descendants(include_self=True).order_by().values_list("content_id", flat=True) # get all summary logs for the current user that correspond to the content nodes and descendant content nodes if default_database_is_attached(): # if possible, do a direct join between the content and default databases channel_alias = get_active_content_database() SummaryLogManager = ContentSummaryLog.objects.using(channel_alias) else: # otherwise, convert the leaf queryset into a flat list of ids and use that SummaryLogManager = ContentSummaryLog.objects content_ids = list(content_ids) # Filter by users and the content ids progress_query = SummaryLogManager \ .filter(user__in=users, content_id__in=content_ids) # Conditionally filter by last active time if kwargs.get('last_active_time'): progress_query.filter(end_timestamp__gte=parse(kwargs.get('last_active_time'))) # Get an annotated list of dicts of type: # { # 'content_id': <content_id>, # 'kind': <kind>, # 'total_progress': <sum of all progress for this content>, # 'log_count_total': <number of summary logs for this content>, # 'log_count_complete': <number of complete summary logs for this content>, # 'last_active': <most recent end_timestamp for this content>, # } progress_list = progress_query.values('content_id', 'kind').annotate( total_progress=Sum('progress'), log_count_total=Count('pk'), log_count_complete=Sum(Case(When(progress=1, then=1), default=0, output_field=IntegerField())), last_active=Max('end_timestamp')) # Evaluate query and make a loop dict of all progress progress_dict = {item.get('content_id'): item for item in progress_list} if isinstance(target_nodes, ContentNode): # Have been passed an individual model target_nodes = [target_nodes] # Loop through each node to add progress and last active information to the output dicts for target_node in target_nodes: # In the case of a topic, we need to look at the progress and last active from each of its descendants if target_node.kind == content_kinds.TOPIC: # Get all the content_ids and kinds of each leaf node as a tuple # (about half the size of the dict from 'values' method) # Remove topics in generator comprehension, rather than using .exclude as kind is not indexed # Use set to remove repeated content leaf_nodes = set(node for node in target_node.get_descendants(include_self=False).order_by().values_list( 'content_id', 'kind') if node[1] != content_kinds.TOPIC) # Get a unique set of all non-topic content kinds leaf_kinds = sorted(set(leaf_node[1] for leaf_node in leaf_nodes)) # Create a list of progress summary dicts for each content kind progress = [{ # For total progress sum across all the progress dicts for the descendant content leaf nodes 'total_progress': reduce( # Reduce with a function that just adds the total_progress of the passed in dict to the accumulator sum_progress_dicts, # Get all dicts of progress for every leaf_id that has some progress recorded # and matches the kind we are aggregating over (progress_dict.get(leaf_node[0]) for leaf_node in leaf_nodes\ if leaf_node[0] in progress_dict and leaf_node[1] == kind), # Pass in an initial value of total_progress as zero to initialize the reduce 0.0, ), 'kind': kind, # Count the number of leaf nodes of this particular kind 'node_count': reduce(lambda x, y: x + int(y[1] == kind), leaf_nodes, 0) } for kind in leaf_kinds] # Set the output progress for this topic to this list of progress dicts output_progress_dict[target_node.content_id] = progress # Create a generator of last active times for the leaf_ids last_active_times = map( # Return the last active time for this leaf_node lambda leaf_node: progress_dict[leaf_node[0]]['last_active'], filter( # Filter leaf_nodes to those that are in the progress_dict lambda leaf_node: leaf_node[0] in progress_dict, leaf_nodes)) # Max does not handle empty iterables, so try this try: # If it is not empty, great! output_last_active_dict[target_node.content_id] = max(last_active_times) except (ValueError, TypeError): # If it is empty, catch the value error and set the last active time to None # If they are all none, catch the TypeError and also set to None output_last_active_dict[target_node.content_id] = None else: if target_node.content_id in progress_dict: progress = progress_dict.pop(target_node.content_id) output_last_active_dict[target_node.content_id] = progress.pop('last_active') # return as array for consistency in api output_progress_dict[target_node.content_id] = [{ 'total_progress': progress['total_progress'], 'log_count_total': progress['log_count_total'], 'log_count_complete': progress['log_count_complete'], }] elif target_node.content_id not in output_progress_dict: # Not in the progress dict, but also not in our output, so supply default values output_last_active_dict[target_node.content_id] = None output_progress_dict[target_node.content_id] = [{ 'total_progress': 0.0, 'log_count_total': 0, 'log_count_complete': 0, }] return output_progress_dict, output_last_active_dict