Exemplo n.º 1
0
def get_pathways(site, pathway_id=None):
    """
    Read pathways from the cache.
    The cache is populated by a management command, cache_programs.

    Arguments:
        site (Site): django.contrib.sites.models object

    Keyword Arguments:
        pathway_id (string): id identifying a specific pathway to read from the cache.

    Returns:
        list of dict, representing pathways.
        dict, if a specific pathway is requested.
    """
    missing_details_msg_tpl = 'Failed to get details for credit pathway {id} from the cache.'

    if pathway_id:
        pathway = cache.get(PATHWAY_CACHE_KEY_TPL.format(id=pathway_id))
        if not pathway:
            logger.warning(missing_details_msg_tpl.format(id=pathway_id))

        return pathway
    pathway_ids = cache.get(
        SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), [])
    if not pathway_ids:
        logger.warning('Failed to get credit pathway ids from the cache.')

    pathways = cache.get_many([
        PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)
        for pathway_id in pathway_ids
    ])
    pathways = pathways.values()

    # The get_many above sometimes fails to bring back details cached on one or
    # more Memcached nodes. It doesn't look like these keys are being evicted.
    # 99% of the time all keys come back, but 1% of the time all the keys stored
    # on one or more nodes are missing from the result of the get_many. One
    # get_many may fail to bring these keys back, but a get_many occurring
    # immediately afterwards will succeed in bringing back all the keys. This
    # behavior can be mitigated by trying again for the missing keys, which is
    # what we do here. Splitting the get_many into smaller chunks may also help.
    missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways)
    if missing_ids:
        logger.info(
            'Failed to get details for {count} pathways. Retrying.'.format(
                count=len(missing_ids)))

        retried_pathways = cache.get_many([
            PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)
            for pathway_id in missing_ids
        ])
        pathways += retried_pathways.values()

        still_missing_ids = set(pathway_ids) - set(pathway['id']
                                                   for pathway in pathways)
        for missing_id in still_missing_ids:
            logger.warning(missing_details_msg_tpl.format(id=missing_id))

    return pathways
Exemplo n.º 2
0
    def test_get_many(self, mock_warning, mock_info):
        pathways = PathwayFactory.create_batch(3)

        # Cache details for 2 of 3 programs.
        partial_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway
            for pathway in pathways[:2]
        }
        cache.set_many(partial_pathways, None)

        # When called before pathways are cached, the function should return an
        # empty list and log a warning.
        assert get_pathways(self.site) == []
        mock_warning.assert_called_once_with(
            'Failed to get credit pathway ids from the cache.')
        mock_warning.reset_mock()

        # Cache all 3 pathways
        cache.set(
            SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site.domain),
            [pathway['id'] for pathway in pathways], None)

        actual_pathways = get_pathways(self.site)

        # The 2 cached pathways should be returned while info and warning
        # messages should be logged for the missing one.
        assert {pathway['id'] for pathway in actual_pathways} ==\
               {pathway['id'] for pathway in partial_pathways.values()}
        mock_info.assert_called_with(
            'Failed to get details for 1 pathways. Retrying.')
        mock_warning.assert_called_with(
            'Failed to get details for credit pathway {id} from the cache.'.
            format(id=pathways[2]['id']))
        mock_warning.reset_mock()

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            assert pathway == partial_pathways[key]

        # Cache details for all 3 pathways.
        all_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway
            for pathway in pathways
        }
        cache.set_many(all_pathways, None)

        actual_pathways = get_pathways(self.site)

        # All 3 pathways should be returned.
        assert {pathway['id'] for pathway in actual_pathways} ==\
               {pathway['id'] for pathway in all_pathways.values()}
        assert not mock_warning.called

        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            assert pathway == all_pathways[key]
Exemplo n.º 3
0
def get_pathways(site, pathway_id=None):
    """
    Read pathways from the cache.
    The cache is populated by a management command, cache_programs.

    Arguments:
        site (Site): django.contrib.sites.models object

    Keyword Arguments:
        pathway_id (string): id identifying a specific pathway to read from the cache.

    Returns:
        list of dict, representing pathways.
        dict, if a specific pathway is requested.
    """
    missing_details_msg_tpl = 'Failed to get details for credit pathway {id} from the cache.'

    if pathway_id:
        pathway = cache.get(PATHWAY_CACHE_KEY_TPL.format(id=pathway_id))
        if not pathway:
            logger.warning(missing_details_msg_tpl.format(id=pathway_id))

        return pathway
    pathway_ids = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), [])
    if not pathway_ids:
        logger.warning('Failed to get credit pathway ids from the cache.')

    pathways = cache.get_many([PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in pathway_ids])
    pathways = pathways.values()

    # The get_many above sometimes fails to bring back details cached on one or
    # more Memcached nodes. It doesn't look like these keys are being evicted.
    # 99% of the time all keys come back, but 1% of the time all the keys stored
    # on one or more nodes are missing from the result of the get_many. One
    # get_many may fail to bring these keys back, but a get_many occurring
    # immediately afterwards will succeed in bringing back all the keys. This
    # behavior can be mitigated by trying again for the missing keys, which is
    # what we do here. Splitting the get_many into smaller chunks may also help.
    missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways)
    if missing_ids:
        logger.info(
            'Failed to get details for {count} pathways. Retrying.'.format(count=len(missing_ids))
        )

        retried_pathways = cache.get_many(
            [PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in missing_ids]
        )
        pathways += retried_pathways.values()

        still_missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways)
        for missing_id in still_missing_ids:
            logger.warning(missing_details_msg_tpl.format(id=missing_id))

    return pathways
Exemplo n.º 4
0
    def test_pathways_multiple_pages(self):
        """
        Verify that the command properly caches credit pathways when multiple pages are returned from its endpoint
        """
        UserFactory(username=self.catalog_integration.service_username)
        new_pathways = PathwayFactory.create_batch(40)
        for new_pathway in new_pathways:
            new_pathway['programs'] = []
        pathways = self.pathways + new_pathways

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        self.mock_list()
        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        # mock 3 pages of credit pathways, starting at the last
        self.mock_pathways(pathways[40:], page_number=3, final=True)
        self.mock_pathways(pathways[20:40], page_number=2, final=False)
        self.mock_pathways(pathways[:20], page_number=1, final=False)

        call_command('cache_programs')

        pathways_dict = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }
        pathway_keys = list(pathways_dict.keys())

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways_dict)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways_dict[key]['program_uuids'] = [program['uuid'] for program in pathways_dict[key]['programs']]
            del pathways_dict[key]['programs']

            self.assertEqual(pathway, pathways_dict[key])
    def test_pathways_multiple_pages(self):
        """
        Verify that the command properly caches credit pathways when multiple pages are returned from its endpoint
        """
        UserFactory(username=self.catalog_integration.service_username)
        new_pathways = PathwayFactory.create_batch(40)
        for new_pathway in new_pathways:
            new_pathway['programs'] = []
        pathways = self.pathways + new_pathways

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        self.mock_list()
        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        # mock 3 pages of credit pathways, starting at the last
        self.mock_pathways(pathways[40:], page_number=3, final=True)
        self.mock_pathways(pathways[20:40], page_number=2, final=False)
        self.mock_pathways(pathways[:20], page_number=1, final=False)

        call_command('cache_programs')

        pathways_dict = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }
        pathway_keys = list(pathways_dict.keys())

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways_dict)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways_dict[key]['program_uuids'] = [program['uuid'] for program in pathways_dict[key]['programs']]
            del pathways_dict[key]['programs']

            self.assertEqual(pathway, pathways_dict[key])
Exemplo n.º 6
0
    def test_handle_pathways(self):
        """
        Verify that the command requests and caches credit pathways
        """

        UserFactory(username=self.catalog_integration.service_username)

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in self.pathways
        }

        self.mock_list()
        self.mock_pathways(self.pathways)

        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        call_command('cache_programs')

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        pathway_keys = list(pathways.keys())
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways[key]['program_uuids'] = [program['uuid'] for program in pathways[key]['programs']]
            del pathways[key]['programs']

            self.assertEqual(pathway, pathways[key])
    def test_handle_pathways(self):
        """
        Verify that the command requests and caches credit pathways
        """

        UserFactory(username=self.catalog_integration.service_username)

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in self.pathways
        }

        self.mock_list()
        self.mock_pathways(self.pathways)

        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        call_command('cache_programs')

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        pathway_keys = list(pathways.keys())
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways[key]['program_uuids'] = [program['uuid'] for program in pathways[key]['programs']]
            del pathways[key]['programs']

            self.assertEqual(pathway, pathways[key])
Exemplo n.º 8
0
    def test_handle_missing_pathways(self):
        """
        Verify that the command raises an exception when it fails to retrieve pathways.
        """
        UserFactory(username=self.catalog_integration.service_username)

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        self.mock_list()

        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        with self.assertRaises(SystemExit) as context:
            call_command('cache_programs')
        self.assertEqual(context.exception.code, 1)

        cached_pathways = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        self.assertEqual(cached_pathways, [])
    def test_handle_missing_pathways(self):
        """
        Verify that the command raises an exception when it fails to retrieve pathways.
        """
        UserFactory(username=self.catalog_integration.service_username)

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        self.mock_list()

        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        with self.assertRaises(SystemExit) as context:
            call_command('cache_programs')
        self.assertEqual(context.exception.code, 1)

        cached_pathways = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        self.assertEqual(cached_pathways, [])
Exemplo n.º 10
0
    def handle(self, *args, **options):
        failure = False
        logger.info('populate-multitenant-programs switch is ON')

        catalog_integration = CatalogIntegration.current()
        username = catalog_integration.service_username

        try:
            user = User.objects.get(username=username)
        except User.DoesNotExist:
            logger.error(
                'Failed to create API client. Service user {username} does not exist.'.format(username=username)
            )
            raise

        programs = {}
        pathways = {}
        for site in Site.objects.all():
            site_config = getattr(site, 'configuration', None)
            if site_config is None or not site_config.get_value('COURSE_CATALOG_API_URL'):
                logger.info('Skipping site {domain}. No configuration.'.format(domain=site.domain))
                cache.set(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), [], None)
                cache.set(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), [], None)
                continue

            client = create_catalog_api_client(user, site=site)
            uuids, program_uuids_failed = self.get_site_program_uuids(client, site)
            new_programs, program_details_failed = self.fetch_program_details(client, uuids)
            new_pathways, pathways_failed = self.get_pathways(client, site)
            new_pathways, new_programs, pathway_processing_failed = self.process_pathways(site, new_pathways,
                                                                                          new_programs)

            if program_uuids_failed or program_details_failed or pathways_failed or pathway_processing_failed:
                failure = True

            programs.update(new_programs)
            pathways.update(new_pathways)

            logger.info('Caching UUIDs for {total} programs for site {site_name}.'.format(
                total=len(uuids),
                site_name=site.domain,
            ))
            cache.set(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), uuids, None)

            pathway_ids = new_pathways.keys()
            logger.info('Caching ids for {total} credit pathways for site {site_name}.'.format(
                total=len(pathway_ids),
                site_name=site.domain,
            ))
            cache.set(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), pathway_ids, None)

        successful_programs = len(programs)
        logger.info('Caching details for {successful_programs} programs.'.format(
            successful_programs=successful_programs))
        cache.set_many(programs, None)

        successful_pathways = len(pathways)
        logger.info('Caching details for {successful_pathways} credit pathways.'.format(
            successful_pathways=successful_pathways))
        cache.set_many(pathways, None)

        if failure:
            # This will fail a Jenkins job running this command, letting site
            # operators know that there was a problem.
            sys.exit(1)
Exemplo n.º 11
0
    def handle(self, *args, **options):  # lint-amnesty, pylint: disable=too-many-statements
        failure = False
        logger.info('populate-multitenant-programs switch is ON')

        catalog_integration = CatalogIntegration.current()
        username = catalog_integration.service_username

        try:
            user = User.objects.get(username=username)
        except User.DoesNotExist:
            logger.exception(
                f'Failed to create API client. Service user {username} does not exist.'
            )
            raise

        programs = {}
        pathways = {}
        courses = {}
        catalog_courses = {}
        programs_by_type = {}
        programs_by_type_slug = {}
        organizations = {}
        for site in Site.objects.all():
            site_config = getattr(site, 'configuration', None)
            if site_config is None or not site_config.get_value(
                    'COURSE_CATALOG_API_URL'):
                logger.info(f'Skipping site {site.domain}. No configuration.')
                cache.set(
                    SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(
                        domain=site.domain), [], None)
                cache.set(
                    SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain),
                    [], None)
                continue

            client = create_catalog_api_client(user, site=site)
            uuids, program_uuids_failed = self.get_site_program_uuids(
                client, site)
            new_programs, program_details_failed = self.fetch_program_details(
                client, uuids)
            new_pathways, pathways_failed = self.get_pathways(client, site)
            new_pathways, new_programs, pathway_processing_failed = self.process_pathways(
                site, new_pathways, new_programs)

            failure = any([
                program_uuids_failed,
                program_details_failed,
                pathways_failed,
                pathway_processing_failed,
            ])

            programs.update(new_programs)
            pathways.update(new_pathways)
            courses.update(self.get_courses(new_programs))
            catalog_courses.update(self.get_catalog_courses(new_programs))
            programs_by_type.update(
                self.get_programs_by_type(site, new_programs))
            programs_by_type_slug.update(
                self.get_programs_by_type_slug(site, new_programs))
            organizations.update(
                self.get_programs_by_organization(new_programs))

            logger.info(
                'Caching UUIDs for {total} programs for site {site_name}.'.
                format(
                    total=len(uuids),
                    site_name=site.domain,
                ))
            cache.set(
                SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain),
                uuids, None)

            pathway_ids = list(new_pathways.keys())
            logger.info(
                'Caching ids for {total} pathways for site {site_name}.'.
                format(
                    total=len(pathway_ids),
                    site_name=site.domain,
                ))
            cache.set(
                SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain),
                pathway_ids, None)

        logger.info(f'Caching details for {len(programs)} programs.')
        cache.set_many(programs, None)

        logger.info(f'Caching details for {len(pathways)} pathways.')
        cache.set_many(pathways, None)

        logger.info(f'Caching programs uuids for {len(courses)} courses.')
        cache.set_many(courses, None)

        logger.info(
            f'Caching programs uuids for {len(catalog_courses)} catalog courses.'
        )
        cache.set_many(catalog_courses, None)

        logger.info(
            str(f'Caching program UUIDs by {len(programs_by_type)} program types.'
                ))
        cache.set_many(programs_by_type, None)

        logger.info(
            str(f'Caching program UUIDs by {len(programs_by_type_slug)} program type slugs.'
                ))
        cache.set_many(programs_by_type_slug, None)

        logger.info(
            f'Caching programs uuids for {len(organizations)} organizations')
        cache.set_many(organizations, None)

        if failure:
            sys.exit(1)
Exemplo n.º 12
0
    def test_get_many(self, mock_warning, mock_info):
        pathways = PathwayFactory.create_batch(3)

        # Cache details for 2 of 3 programs.
        partial_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways[:2]
        }
        cache.set_many(partial_pathways, None)

        # When called before pathways are cached, the function should return an
        # empty list and log a warning.
        self.assertEqual(get_pathways(self.site), [])
        mock_warning.assert_called_once_with('Failed to get credit pathway ids from the cache.')
        mock_warning.reset_mock()

        # Cache all 3 pathways
        cache.set(
            SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site.domain),
            [pathway['id'] for pathway in pathways],
            None
        )

        actual_pathways = get_pathways(self.site)

        # The 2 cached pathways should be returned while info and warning
        # messages should be logged for the missing one.
        self.assertEqual(
            set(pathway['id'] for pathway in actual_pathways),
            set(pathway['id'] for pathway in partial_pathways.values())
        )
        mock_info.assert_called_with('Failed to get details for 1 pathways. Retrying.')
        mock_warning.assert_called_with(
            'Failed to get details for credit pathway {id} from the cache.'.format(id=pathways[2]['id'])
        )
        mock_warning.reset_mock()

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            self.assertEqual(pathway, partial_pathways[key])

        # Cache details for all 3 pathways.
        all_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }
        cache.set_many(all_pathways, None)

        actual_pathways = get_pathways(self.site)

        # All 3 pathways should be returned.
        self.assertEqual(
            set(pathway['id'] for pathway in actual_pathways),
            set(pathway['id'] for pathway in all_pathways.values())
        )
        self.assertFalse(mock_warning.called)

        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            self.assertEqual(pathway, all_pathways[key])
Exemplo n.º 13
0
    def handle(self, *args, **options):
        failure = False
        logger.info('populate-multitenant-programs switch is ON')

        catalog_integration = CatalogIntegration.current()
        username = catalog_integration.service_username

        try:
            user = User.objects.get(username=username)
        except User.DoesNotExist:
            logger.exception(
                u'Failed to create API client. Service user {username} does not exist.'
                .format(username=username))
            raise

        programs = {}
        pathways = {}
        courses = {}
        for site in Site.objects.all():
            site_config = getattr(site, 'configuration', None)
            if site_config is None or not site_config.get_value(
                    'COURSE_CATALOG_API_URL'):
                logger.info(
                    u'Skipping site {domain}. No configuration.'.format(
                        domain=site.domain))
                cache.set(
                    SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(
                        domain=site.domain), [], None)
                cache.set(
                    SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain),
                    [], None)
                continue

            client = create_catalog_api_client(user, site=site)
            uuids, program_uuids_failed = self.get_site_program_uuids(
                client, site)
            new_programs, program_details_failed = self.fetch_program_details(
                client, uuids)
            new_pathways, pathways_failed = self.get_pathways(client, site)
            new_pathways, new_programs, pathway_processing_failed = self.process_pathways(
                site, new_pathways, new_programs)
            new_courses, courses_failed = self.get_courses(new_programs)

            failure = any([
                program_uuids_failed,
                program_details_failed,
                pathways_failed,
                pathway_processing_failed,
                courses_failed,
            ])

            programs.update(new_programs)
            pathways.update(new_pathways)
            courses.update(new_courses)

            logger.info(
                u'Caching UUIDs for {total} programs for site {site_name}.'.
                format(
                    total=len(uuids),
                    site_name=site.domain,
                ))
            cache.set(
                SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain),
                uuids, None)

            pathway_ids = list(new_pathways.keys())
            logger.info(
                u'Caching ids for {total} pathways for site {site_name}.'.
                format(
                    total=len(pathway_ids),
                    site_name=site.domain,
                ))
            cache.set(
                SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain),
                pathway_ids, None)

        successful_programs = len(programs)
        logger.info(
            u'Caching details for {successful_programs} programs.'.format(
                successful_programs=successful_programs))
        cache.set_many(programs, None)

        successful_pathways = len(pathways)
        logger.info(
            u'Caching details for {successful_pathways} pathways.'.format(
                successful_pathways=successful_pathways))
        cache.set_many(pathways, None)

        successful_courses = len(courses)
        logger.info(
            u'Caching programs uuids for {successful_courses} courses.'.format(
                successful_courses=successful_courses))
        cache.set_many(courses, None)

        if failure:
            # This will fail a Jenkins job running this command, letting site
            # operators know that there was a problem.
            sys.exit(1)