def setup(self, db):
        self.date_for = datetime.date(2018, 10, 1)
        self.site = Site.objects.first()
        self.users = [
            UserFactory(date_joined=as_datetime(self.date_for -
                                                datetime.timedelta(days=60)))
            for i in range(0, 3)
        ]
        self.course_overviews = [
            CourseOverviewFactory(
                created=as_datetime(self.date_for -
                                    datetime.timedelta(days=60)))
            for i in range(0, 3)
        ]
        self.cdm_recs = [
            CourseDailyMetricsFactory(site=self.site,
                                      date_for=self.date_for,
                                      **cdm) for cdm in CDM_INPUT_TEST_DATA
        ]
        self.prev_day_sdm = SiteDailyMetricsFactory(site=self.site,
                                                    date_for=prev_day(
                                                        self.date_for),
                                                    **SDM_DATA[1])

        if is_multisite():
            self.organization = OrganizationFactory(sites=[self.site])
            for co in self.course_overviews:
                OrganizationCourseFactory(organization=self.organization,
                                          course_id=str(co.id))
            if organizations_support_sites():
                for user in self.users:
                    UserOrganizationMappingFactory(
                        user=user, organization=self.organization)
Пример #2
0
    def extract(self, site, date_for=None, **kwargs):  # pylint: disable=unused-argument
        '''
        We get the count from the User model since there can be registered users
        who have not enrolled.

        TODO: Exclude non-students from the user count
        '''
        if not date_for:
            date_for = prev_day(
                datetime.datetime.utcnow().replace(tzinfo=utc).date())

        data = dict()

        site_users = get_users_for_site(site)
        user_count = site_users.filter(
            date_joined__lt=as_datetime(next_day(date_for))).count()
        site_courses = get_courses_for_site(site)
        course_count = site_courses.filter(
            created__lt=as_datetime(next_day(date_for))).count()

        todays_active_users = get_site_active_users_for_date(site, date_for)
        todays_active_user_count = todays_active_users.count()
        mau = site_mau_1g_for_month_as_of_day(site, date_for)

        data['todays_active_user_count'] = todays_active_user_count
        data[
            'cumulative_active_user_count'] = get_previous_cumulative_active_user_count(
                site, date_for) + todays_active_user_count
        data['total_user_count'] = user_count
        data['course_count'] = course_count
        data['total_enrollment_count'] = get_total_enrollment_count(
            site, date_for)
        data['mau'] = mau.count()
        return data
Пример #3
0
 def calc_from_user_model():
     filter_args = dict(
         date_joined__gt=as_datetime(prev_day(start_date)),
         date_joined__lt=as_datetime(next_day(end_date)),
     )
     users = figures.sites.get_users_for_site(site)
     return users.filter(**filter_args).values('id').distinct().count()
Пример #4
0
def seed_course_overviews(data=None):

    if not data:
        data = cans.COURSE_OVERVIEW_DATA
        # append with randomly generated course overviews to test pagination
        new_courses = [
            generate_course_overview(i, org='FOO') for i in xrange(20)
        ]
        data += new_courses

    for rec in data:
        course_id = rec['id']
        defaults = dict(
            display_name=rec['display_name'],
            org=rec['org'],
            display_org_with_default=rec['org'],
            number=rec['number'],
            created=as_datetime(rec['created']).replace(tzinfo=utc),
            start=as_datetime(rec['enrollment_start']).replace(tzinfo=utc),
            end=as_datetime(rec['enrollment_end']).replace(tzinfo=utc),
            enrollment_start=as_datetime(
                rec['enrollment_start']).replace(tzinfo=utc),
            enrollment_end=as_datetime(
                rec['enrollment_end']).replace(tzinfo=utc),
        )
        if RELEASE_LINE != 'ginkgo':
            defaults['version'] = CourseOverview.VERSION
        CourseOverview.objects.update_or_create(
            id=as_course_key(course_id),
            defaults=defaults,
        )
Пример #5
0
    def extract(self, date_for=None, **kwargs):
        '''
        We get the count from the User model since there can be registered users
        who have not enrolled.

        TODO: Exclude non-students from the user count
        '''
        if not date_for:
            date_for = prev_day(
                datetime.datetime.utcnow().replace(tzinfo=utc).date())

        data = dict()

        user_count = get_user_model().objects.filter(
            date_joined__lt=as_datetime(next_day(date_for))).count()
        course_count = CourseOverview.objects.filter(
            created__lt=as_datetime(next_day(date_for))).count()

        todays_active_user_count = get_active_user_count_for_date(date_for)
        data['todays_active_user_count'] = todays_active_user_count
        data[
            'cumulative_active_user_count'] = get_previous_cumulative_active_user_count(
                date_for) + todays_active_user_count
        data['total_user_count'] = user_count
        data['course_count'] = course_count
        data['total_enrollment_count'] = get_total_enrollment_count(date_for)
        return data
Пример #6
0
def get_active_users_for_time_period(site,
                                     start_date,
                                     end_date,
                                     course_ids=None):
    """
    Returns the number of users active in the time period.

    This is determined by finding the unique user ids for StudentModule records
    modified in a time period

    We don't do this only because it raises timezone warnings
        modified__range=(as_date(start_date), as_date(end_date)),
    """
    # Get list of learners for the site

    user_ids = figures.sites.get_user_ids_for_site(site)
    filter_args = dict(
        modified__gt=as_datetime(prev_day(start_date)),
        modified__lt=as_datetime(next_day(end_date)),
        student_id__in=user_ids,
    )
    if course_ids:
        filter_args['course_ids__in'] = course_ids

    return StudentModule.objects.filter(
        **filter_args).values('student__id').distinct().count()
Пример #7
0
    def setup(self, db, settings):

        # Set up data that's the same for standalone or multisite
        self.date_for = utc_yesterday()
        self.site = Site.objects.first()
        self.courses = [CourseOverviewFactory(), CourseOverviewFactory()]

        # Two for "our" course, one for another course in the same site
        self.enrollments = [
            CourseEnrollmentFactory(course_id=self.courses[0].id),
            CourseEnrollmentFactory(course_id=self.courses[0].id),
            CourseEnrollmentFactory(course_id=self.courses[1].id),
        ]

        self.ce0_sm = StudentModuleFactory.from_course_enrollment(
            self.enrollments[0],
            created=as_datetime(self.date_for),
            modified=as_datetime(self.date_for))

        # Handle site mode specifices
        if organizations_support_sites():
            settings.FEATURES['FIGURES_IS_MULTISITE'] = True
            self.org = OrganizationFactory(sites=[self.site])
            for course in self.courses:
                OrganizationCourseFactory(organization=self.org,
                                          course_id=str(course.id))
            map_users_to_org(self.org, [ce.user for ce in self.enrollments])

            # For our tests, we focus on a single enrollment. We should not
            # need to stand up other site data, but if we find we do need to,
            # then here's the place to do it
        else:
            self.org = OrganizationFactory()
Пример #8
0
def seed_student_modules_fixed(data=None):
    '''
    '''
    if not data:
        data = STUDENT_MODULE_DATA
    for rec in data:
        StudentModule.objects.update_or_create(
            student=get_user_model().objects.get(username=rec['username']),
            course_id=as_course_key(rec['course_id']),
            create=as_datetime(rec['created']),
            modified=as_datetime(rec['modified']),
        )
Пример #9
0
 def setup(self, db):
     self.date_for = datetime.date(2018, 10, 1)
     self.users = [UserFactory(
         date_joined=as_datetime(self.date_for - datetime.timedelta(days=60))
         ) for i in range(0, 3)]
     self.course_overviews = [CourseOverviewFactory(
         created=as_datetime(self.date_for - datetime.timedelta(days=60))
         ) for i in range(0, 3)]
     self.cdm_recs = [CourseDailyMetricsFactory(
         date_for=self.date_for,
         **cdm
         ) for cdm in CDM_INPUT_TEST_DATA]
     self.prev_day_sdm = SiteDailyMetricsFactory(
         date_for=prev_day(self.date_for),
         **SDM_PREV_DAY[1])
Пример #10
0
def seed_users(data=None):
    if not data:
        data = cans.USER_DATA

    first_date = days_from(LAST_DAY, DAYS_BACK * -1)
    created_users = []
    for rec in data:
        try:
            profile_rec = rec.get('profile', None)
            user = get_user_model().objects.create_user(
                username=rec['username'],
                password=rec['password'],
                email=rec['email'],
            )
            user.is_staff = rec.get('is_staff', False)
            user.is_superuser = rec.get('is_superuser', False)
            user.date_joined = as_datetime(
                FAKE.date_between(first_date, LAST_DAY)).replace(tzinfo=utc)
            user.save()
            created_users.append(user)
            if profile_rec:
                UserProfile.objects.create(
                    user=user,
                    name=profile_rec['fullname'],
                    gender=profile_rec.get('gender', None),
                    country=profile_rec.get('country', None),
                )
        except IntegrityError as e:
            print('skipping duplicate user email {}'.format(e))
    return created_users
Пример #11
0
    def load(self, date_for=None, force_update=False, **_kwargs):
        """
        TODO: clean up how we do this. We want to be able to call the loader
        with an existing data set (not having to call the extractor) but we
        need to make sure that the metrics row 'date_for' is the same as
        provided in the data. So before hacking something together, I want to
        think this over some more.

        If the record alrdady exists and force_update is False, then simply
        return the record with the 'created' flag to False. This saves us an
        unnecessary call to extract data

        Raises ValidationError if invalid data is attempted to be saved to the
        course daily metrics model instance
        """
        if not date_for:
            date_for = prev_day(
                datetime.datetime.utcnow().replace(tzinfo=utc).date())
        else:
            date_for = as_datetime(date_for).replace(tzinfo=utc)
        try:
            cdm = CourseDailyMetrics.objects.get(course_id=self.course_id,
                                                 date_for=date_for)
            # record found, only update if force update flag is True
            if not force_update:
                return (
                    cdm,
                    False,
                )
        except CourseDailyMetrics.DoesNotExist:
            # record not found, move on to creating
            pass

        data = self.get_data(date_for=date_for)
        return self.save_metrics(date_for=date_for, data=data)
    def test_extract(self, monkeypatch):
        expected_results = dict(
            cumulative_active_user_count=52,  # previous cumulative is 50
            todays_active_user_count=2,
            total_user_count=len(self.users),
            course_count=len(CDM_INPUT_TEST_DATA),
            total_enrollment_count=150,
        )

        assert not StudentModule.objects.count()
        modified = as_datetime(self.date_for)

        def mock_student_modules_for_site(site):
            users = [UserFactory() for i in range(2)]
            for user in users:
                StudentModuleFactory(student=user, modified=modified)
                StudentModuleFactory(student=user, modified=modified)
            return StudentModule.objects.filter(student__in=users)

        monkeypatch.setattr(pipeline_sdm, 'get_student_modules_for_site',
                            mock_student_modules_for_site)

        for course in figures.sites.get_courses_for_site(self.site):
            assert course.created.date() < self.date_for
        for user in figures.sites.get_users_for_site(self.site):
            assert user.date_joined.date() < self.date_for

        actual = pipeline_sdm.SiteDailyMetricsExtractor().extract(
            site=self.site, date_for=self.date_for)

        for key, value in expected_results.iteritems():
            assert actual[key] == value, 'failed on key: "{}"'.format(key)
Пример #13
0
def get_active_users_for_time_period(start_date,
                                     end_date,
                                     site=None,
                                     course_ids=None):
    """
    Returns the number of users active in the time period.

    This is determined by finding the unique user ids for StudentModule records
    modified in a time period
    """
    filter_args = dict(created__gt=as_datetime(prev_day(start_date)),
                       modified__lt=as_datetime(next_day(end_date)))
    if course_ids:
        filter_args['course_ids__in'] = course_ids

    return StudentModule.objects.filter(
        **filter_args).values('student__id').distinct().count()
Пример #14
0
def get_course_enrollments(course_id, date_for):
    """Convenience method to get a filterd queryset of CourseEnrollment objects

    """
    return CourseEnrollment.objects.filter(
        course_id=as_course_key(course_id),
        created__lt=as_datetime(next_day(date_for)),
    )
Пример #15
0
 def test_get_course_enrollments_for_course(self):
     course_id = self.course_overviews[0].id
     expected_ce = CourseEnrollment.objects.filter(
         course_id=course_id, created__lt=as_datetime(next_day(
             self.today))).values_list('id', flat=True)
     results_ce = pipeline_cdm.get_course_enrollments(
         course_id=course_id, date_for=self.today).values_list('id',
                                                               flat=True)
     assert set(results_ce) == set(expected_ce)
Пример #16
0
def get_active_learner_ids_today(course_id, date_for):
    """Get unique user ids for learners who are active today for the given
    course and date

    """
    return StudentModule.objects.filter(
        course_id=as_course_key(course_id),
        modified=as_datetime(date_for)).values_list('student__id',
                                                    flat=True).distinct()
Пример #17
0
def seed_course_overviews(data=None):

    if not data:
        data = cans.COURSE_OVERVIEW_DATA

    for rec in data:
        course_id = rec['id']
        CourseOverview.objects.update_or_create(
            id=as_course_key(course_id),
            defaults=dict(
                display_name=rec['display_name'],
                org=rec['org'],
                display_org_with_default=rec['org'],
                number=rec['number'],
                created=as_datetime(rec['created']).replace(tzinfo=utc),
                enrollment_start=as_datetime(
                    rec['enrollment_start']).replace(tzinfo=utc),
                enrollment_end=as_datetime(
                    rec['enrollment_end']).replace(tzinfo=utc),
            ))
Пример #18
0
def get_num_learners_completed(course_id, date_for):
    """
    Get the total number of certificates generated for the course up to the
    'date_for' date

    We will need to relabel this to "certificates"

    We may want to get the number of certificates granted in the given day
    """
    certificates = GeneratedCertificate.objects.filter(
        course_id=as_course_key(course_id),
        created_date__lt=as_datetime(next_day(date_for)))
    return certificates.count()
Пример #19
0
    def setup(self, db):
        self.today = datetime.date(2018, 6, 1)
        self.course_overview = CourseOverviewFactory()
        if OPENEDX_RELEASE == GINKGO:
            self.course_enrollments = [
                CourseEnrollmentFactory(course_id=self.course_overview.id)
                for i in range(4)
            ]
        else:
            self.course_enrollments = [
                CourseEnrollmentFactory(course=self.course_overview)
                for i in range(4)
            ]

        if organizations_support_sites():
            self.my_site = SiteFactory(domain='my-site.test')
            self.my_site_org = OrganizationFactory(sites=[self.my_site])
            OrganizationCourseFactory(organization=self.my_site_org,
                                      course_id=str(self.course_overview.id))
            for ce in self.course_enrollments:
                UserOrganizationMappingFactory(user=ce.user,
                                               organization=self.my_site_org)

        self.course_access_roles = [
            CourseAccessRoleFactory(
                user=self.course_enrollments[i].user,
                course_id=self.course_enrollments[i].course_id,
                role=role,
            ) for i, role in enumerate(self.COURSE_ROLES)
        ]

        # create student modules for yesterday and today
        for day in [prev_day(self.today), self.today]:
            self.student_modules = [
                StudentModuleFactory(course_id=ce.course_id,
                                     student=ce.user,
                                     created=ce.created,
                                     modified=as_datetime(day))
                for ce in self.course_enrollments
            ]

        self.cert_days_to_complete = [10, 20, 30]
        self.expected_avg_cert_days_to_complete = 20
        self.generated_certificates = [
            GeneratedCertificateFactory(
                user=self.course_enrollments[i].user,
                course_id=self.course_enrollments[i].course_id,
                created_date=(self.course_enrollments[i].created +
                              datetime.timedelta(days=days)),
            ) for i, days in enumerate(self.cert_days_to_complete)
        ]
Пример #20
0
def get_days_to_complete(course_id, date_for):
    """Return a dict with a list of days to complete and errors

    NOTE: This is a work in progress, as it has issues to resolve:
    * It returns the delta in days, so working in ints
    * This means if a learner starts at midnight and finished just before
      midnight, then 0 days will be given

    NOTE: This has limited scaling. We ought to test it with
    1k, 10k, 100k cert records

    TODO: change to use start_date, end_date with defaults that
    start_date is open and end_date is today

    TODO: Consider collecting the total seconds rather than days
    This will improve accuracy, but may actually not be that important
    TODO: Analyze the error based on number of completions

    When we have to support scale, we can look into optimization
    techinques.
    """
    certificates = GeneratedCertificate.objects.filter(
        course_id=as_course_key(course_id),
        created_date__lte=as_datetime(date_for))

    days = []
    errors = []
    for cert in certificates:
        ce = CourseEnrollment.objects.filter(
            course_id=as_course_key(course_id), user=cert.user)
        # How do we want to handle multiples?
        if ce.count() > 1:
            errors.append(
                dict(
                    msg='Multiple CE records',
                    course_id=course_id,
                    user_id=cert.user.id,
                ))
        try:
            days.append((cert.created_date - ce[0].created).days)
        except IndexError:
            # sometimes a course enrollment is deleted after the cert is generated.  why, who knows?
            # in which case just leave out that data
            errors.append(
                dict(
                    msg='No CourseEnrollment matching user course certificate',
                    course_id=course_id,
                    user_id=cert.user.id,
                ))
    return dict(days=days, errors=errors)
Пример #21
0
def get_active_learner_ids_today(course_id, date_for):
    """Get unique user ids for learners who are active today for the given
    course and date

    Note: When Figures no longer has to support Django 1.8, we can simplify
    this date check:
        https://docs.djangoproject.com/en/1.9/ref/models/querysets/#date
    """
    date_for_as_datetime = as_datetime(date_for)
    return StudentModule.objects.filter(
        course_id=as_course_key(course_id),
        modified__year=date_for_as_datetime.year,
        modified__month=date_for_as_datetime.month,
        modified__day=date_for_as_datetime.day,
    ).values_list('student__id', flat=True).distinct()
Пример #22
0
 def test_get_now_from_date(self):
     '''
     Returns date at midnight
     '''
     a_date = self.now.date()
     expected = datetime.datetime(
         year=a_date.year,
         month=a_date.month,
         day=a_date.day,
         hour=0,
         minute=0,
         second=0,
         microsecond=0,
     ).replace(tzinfo=utc)
     assert as_datetime(a_date) == expected
Пример #23
0
def seed_course_enrollments_for_course(course_id, users, max_days_back):

    def enroll_date(max_days_back):
        days_back = random.randint(1, abs(max_days_back))
        return days_from(LAST_DAY, days_back * -1)

    for user in users:
        if VERBOSE:
            print('seeding course enrollment for user {}'.format(user.username))

        CourseEnrollment.objects.update_or_create(
            course_id=course_id,
            user=user,
            created=as_datetime(enroll_date(max_days_back)).replace(tzinfo=utc),
            )
    def test_get_active_user_count_for_date(self, monkeypatch):
        assert not get_user_model().objects.count()
        assert not StudentModule.objects.count()
        modified = as_datetime(self.date_for)

        def mock_student_modules_for_site(site):
            for user in [UserFactory() for i in range(2)]:
                StudentModuleFactory(student=user, modified=modified)
                StudentModuleFactory(student=user, modified=modified)
            return StudentModule.objects.all()

        monkeypatch.setattr(pipeline_sdm, 'get_student_modules_for_site',
                            mock_student_modules_for_site)
        users = pipeline_sdm.get_site_active_users_for_date(
            site=self.site, date_for=self.date_for)
        assert users.count() == get_user_model().objects.count()
Пример #25
0
    def load(self, site, date_for=None, force_update=False, **_kwargs):
        '''
        Architectural note:
        Initially, we're going to be explicit, requiring callers to specify the
        site model instance to be associated with the site specific metrics
        model(s) we are populating

        TODOs:
        Add filtering for
        * Multi-tenancy
        * Course acess groups
        '''
        if not date_for:
            date_for = prev_day(
                datetime.datetime.utcnow().replace(tzinfo=utc).date())
        else:
            date_for = as_datetime(date_for).replace(tzinfo=utc)
        # if we already have a record for the date_for and force_update is False
        # then skip getting data
        if not force_update:
            try:
                sdm = SiteDailyMetrics.objects.get(site=site,
                                                   date_for=date_for)
                return (
                    sdm,
                    False,
                )

            except SiteDailyMetrics.DoesNotExist:
                # proceed normally
                pass

        data = self.extractor.extract(site=site, date_for=date_for)
        site_metrics, created = SiteDailyMetrics.objects.update_or_create(
            date_for=date_for,
            site=site,
            defaults=dict(
                cumulative_active_user_count=data[
                    'cumulative_active_user_count'],
                todays_active_user_count=data['todays_active_user_count'],
                total_user_count=data['total_user_count'],
                course_count=data['course_count'],
                total_enrollment_count=data['total_enrollment_count'],
                mau=data['mau'],
            ))
        return site_metrics, created
Пример #26
0
def test_mau_1g_for_month_as_of_day_first_day_next_month(db):
    """
    Test getting live MAU 1G values from StudentModule for the given day

    Quick-n-dirty data setup:

    We want to make sure we get the right records when the query happens on the
    first day of the next month. So we do the following

    * Add StudentModule records for the month before we want to capture records
    * Add StudentModule records for the month we want to capture records
    * Add StudentModule records for the month after we want to capture records

    This sets up the scenario that we run the daily pipeline to capture MAU
    "as of" yesterday (the last day of the previous month) to capture MAU for
    the previous month and not capture any records before the previous month,
    nor capture records for the "current month"
    """
    month_before = [as_datetime('2020-02-02'), as_datetime('2020-02-29')]
    month_after = [as_datetime('2020-04-01'), as_datetime('2020-04-01 12:00')]
    in_month = [
        as_datetime('2020-03-01'),
        as_datetime('2020-03-15'),
        as_datetime('2020-03-31'),
        as_datetime('2020-03-31 12:00')
    ]
    date_for = as_date('2020-03-31')

    # Create student modules for the month before, month after, and in the
    # month for which we want to retrieve records
    [StudentModuleFactory(created=dt, modified=dt) for dt in month_before]
    [StudentModuleFactory(created=dt, modified=dt) for dt in month_after]
    sm_in = [
        StudentModuleFactory(created=rec, modified=rec) for rec in in_month
    ]
    expected_user_ids = [obj.student_id for obj in sm_in]

    sm_queryset = StudentModule.objects.all()
    user_ids = mau_1g_for_month_as_of_day(sm_queryset=sm_queryset,
                                          date_for=date_for)
    assert len(user_ids) == len(in_month)
    assert set([rec['student__id']
                for rec in user_ids]) == set(expected_user_ids)
Пример #27
0
def seed_course_completions():
    """
    go over the dates
    """
    for co in CourseOverview.objects.all():
        # Note there is a performance hit for using '?'
        qs = CourseEnrollment.objects.filter(course_id=co.id)
        # we just want a few of the enrollments to have completed

        # first cut, have 25% of learners complete course
        sample = int(qs.count() * 0.25)
        for ce in qs.order_by('?')[:sample]:
            GeneratedCertificate.objects.create(
                user=ce.user,
                course_id=co.id,
                created_date=as_datetime(FAKE.date_between(
                    ce.created, LAST_DAY)).replace(tzinfo=utc),
            )
Пример #28
0
def seed_student_modules():
    """
    We're assuming active students here. Improvement is to skip a few and make
    others more active. Do it in a normal distrubution

    """
    for ce in CourseEnrollment.objects.all():
        # Added assert to check if it traps for this error:
        # ValueError: empty range for randrange() (1554681600,1554595201, -86399)
        assert ce.created <= LAST_DAY, "ce.created={}, LAST_DAY={}".format(
            ce.created, LAST_DAY)

        for i in range(random.randint(1, 5)):
            StudentModule.objects.update_or_create(
                student=ce.user,
                course_id=ce.course_id,
                created=ce.created,
                modified=as_datetime(FAKE.date_between(ce.created, LAST_DAY)),
            )
Пример #29
0
def get_num_enrolled_in_exclude_admins(course_id, date_for):
    """
    Copied over from CourseEnrollmentManager.num_enrolled_in_exclude_admins method
    and modified to filter on date LT

    """
    course_locator = course_id

    if getattr(course_id, 'ccx', None):
        course_locator = course_id.to_course_locator()

    staff = CourseStaffRole(course_locator).users_with_role()
    admins = CourseInstructorRole(course_locator).users_with_role()
    coaches = CourseCcxCoachRole(course_locator).users_with_role()

    return CourseEnrollment.objects.filter(
        course_id=course_id,
        is_active=1,
        created__lt=as_datetime(next_day(date_for)),
    ).exclude(user__in=staff).exclude(user__in=admins).exclude(
        user__in=coaches).count()
Пример #30
0
def missing_course_daily_metrics(site, date_for):
    '''
    Return a list of course ids for any courses missing from the set of
    CourseDailyMetrics for the given date (and site after we implement multi-
    tenancy)

    The type returned is CourseLocator

    We use this to make sure that we are not missing course data when we
    populat the SiteDailyMetrics instance for the given date

    '''
    cdm_course_keys = [
        as_course_key(cdm.course_id) for cdm in
        CourseDailyMetrics.objects.filter(site=site, date_for=date_for)
    ]

    site_course_overviews = figures.sites.get_courses_for_site(site)
    course_overviews = site_course_overviews.filter(
        created__lt=as_datetime(next_day(date_for))).exclude(id__in=cdm_course_keys)

    return set(course_overviews.values_list('id', flat=True))