def get_programs_by_uuids(uuids): """ Gets a list of programs for the provided uuids """ # a list of UUID objects would be a perfectly reasonable parameter to provide uuid_strings = [six.text_type(handle) for handle in uuids] programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=handle) for handle in uuid_strings]) programs = list(programs.values()) # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_uuids = set(uuid_strings) - set(program['uuid'] for program in programs) if missing_uuids: logger.info( u'Failed to get details for {count} programs. Retrying.'.format(count=len(missing_uuids)) ) retried_programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in missing_uuids]) programs += list(retried_programs.values()) still_missing_uuids = set(uuid_strings) - set(program['uuid'] for program in programs) for uuid in still_missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs
def test_get_many_with_missing(self, mock_cache, mock_warning, mock_info): programs = ProgramFactory.create_batch(3) all_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs } partial_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs[:2] } def fake_get_many(keys): if len(keys) == 1: return {PROGRAM_CACHE_KEY_TPL.format(uuid=programs[-1]['uuid']): programs[-1]} else: return partial_programs mock_cache.get.return_value = [program['uuid'] for program in programs] mock_cache.get_many.side_effect = fake_get_many actual_programs = get_programs(self.site) # All 3 cached programs should be returned. An info message should be # logged about the one that was initially missing, but the code should # be able to stitch together all the details. self.assertEqual( set(program['uuid'] for program in actual_programs), set(program['uuid'] for program in all_programs.values()) ) self.assertFalse(mock_warning.called) mock_info.assert_called_with('Failed to get details for 1 programs. Retrying.') for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) self.assertEqual(program, all_programs[key])
def test_pathways_multiple_pages(self): """ Verify that the command properly caches credit pathways when multiple pages are returned from its endpoint """ UserFactory(username=self.catalog_integration.service_username) new_pathways = PathwayFactory.create_batch(40) for new_pathway in new_pathways: new_pathway['programs'] = [] pathways = self.pathways + new_pathways programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) # mock 3 pages of credit pathways, starting at the last self.mock_pathways(pathways[40:], page_number=3, final=True) self.mock_pathways(pathways[20:40], page_number=2, final=False) self.mock_pathways(pathways[:20], page_number=1, final=False) call_command('cache_programs') pathways_dict = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways } pathway_keys = list(pathways_dict.keys()) cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual( set(cached_pathway_keys), set(pathway_keys) ) cached_pathways = cache.get_many(pathway_keys) self.assertEqual( set(cached_pathways), set(pathways_dict) ) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all pathways came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, pathway in cached_pathways.items(): # cached pathways store just program uuids instead of the full programs, transform before comparing pathways_dict[key]['program_uuids'] = [program['uuid'] for program in pathways_dict[key]['programs']] del pathways_dict[key]['programs'] self.assertEqual(pathway, pathways_dict[key])
def get_programs(site, uuid=None): """Read programs from the cache. The cache is populated by a management command, cache_programs. Arguments: site (Site): django.contrib.sites.models object Keyword Arguments: uuid (string): UUID identifying a specific program to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ missing_details_msg_tpl = 'Failed to get details for program {uuid} from the cache.' if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning('Failed to get program UUIDs from the cache.') programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in uuids]) programs = list(programs.values()) # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_uuids = set(uuids) - set(program['uuid'] for program in programs) if missing_uuids: logger.info( 'Failed to get details for {count} programs. Retrying.'.format(count=len(missing_uuids)) ) retried_programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in missing_uuids]) programs += list(retried_programs.values()) still_missing_uuids = set(uuids) - set(program['uuid'] for program in programs) for uuid in still_missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs
def process_pathways(self, site, pathways, programs): """ For each program, add references to each pathway it is a part of. For each pathway, replace the "programs" dict with "program_uuids", which only contains uuids (since program data is already cached) """ processed_pathways = {} failure = False for pathway in pathways: try: pathway_id = pathway['id'] pathway_cache_key = PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) processed_pathways[pathway_cache_key] = pathway uuids = [] for program in pathway['programs']: program_uuid = program['uuid'] program_cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=program_uuid) programs[program_cache_key]['pathway_ids'].append(pathway_id) uuids.append(program_uuid) del pathway['programs'] pathway['program_uuids'] = uuids except: # pylint: disable=bare-except logger.error('Failed to process pathways for {domain}'.format(domain=site.domain)) failure = True return processed_pathways, programs, failure
def _create_catalog_program(self, catalog_org): """ helper method to create a cached catalog program """ program = ProgramFactory.create( authoring_organizations=[catalog_org] ) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']), program, None) return program
def test_handle_pathways(self): """ Verify that the command requests and caches credit pathways """ UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in self.pathways } self.mock_list() self.mock_pathways(self.pathways) for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) call_command('cache_programs') cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain)) pathway_keys = list(pathways.keys()) self.assertEqual( set(cached_pathway_keys), set(pathway_keys) ) cached_pathways = cache.get_many(pathway_keys) self.assertEqual( set(cached_pathways), set(pathways) ) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all pathways came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, pathway in cached_pathways.items(): # cached pathways store just program uuids instead of the full programs, transform before comparing pathways[key]['program_uuids'] = [program['uuid'] for program in pathways[key]['programs']] del pathways[key]['programs'] self.assertEqual(pathway, pathways[key])
def test_handle_programs(self): """ Verify that the command requests and caches program UUIDs and details. """ # Ideally, this user would be created in the test setup and deleted in # the one test case which covers the case where the user is missing. However, # that deletion causes "OperationalError: no such table: wiki_attachmentrevision" # when run on Jenkins. UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() self.mock_pathways(self.pathways) for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) call_command('cache_programs') cached_uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual( set(cached_uuids), set(self.uuids) ) program_keys = list(programs.keys()) cached_programs = cache.get_many(program_keys) # Verify that the keys were all cache hits. self.assertEqual( set(cached_programs), set(programs) ) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, program in cached_programs.items(): # cached programs have a pathways field added to them, remove before comparing del program['pathway_ids'] self.assertEqual(program, programs[key])
def test_handle_missing_programs(self): """ Verify that a problem retrieving a program doesn't prevent the command from retrieving and caching other programs, but does cause it to exit with a non-zero exit code. """ UserFactory(username=self.catalog_integration.service_username) all_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } partial_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs[:2] } self.mock_list() for uuid in self.uuids[:2]: program = partial_programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) with self.assertRaises(SystemExit) as context: call_command('cache_programs') self.assertEqual(context.exception.code, 1) cached_uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual( set(cached_uuids), set(self.uuids) ) program_keys = list(all_programs.keys()) cached_programs = cache.get_many(program_keys) # One of the cache keys should result in a cache miss. self.assertEqual( set(cached_programs), set(partial_programs) ) for key, program in cached_programs.items(): # cached programs have a pathways field added to them, remove before comparing del program['pathway_ids'] self.assertEqual(program, partial_programs[key])
def setup_catalog_cache(self, program_uuid, organization_key): """ helper function to initialize a cached program with an single authoring_organization """ catalog_org = CatalogOrganizationFactory.create(key=organization_key) program = ProgramFactory.create( uuid=program_uuid, authoring_organizations=[catalog_org] ) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=program_uuid), program, None)
def test_handle_missing_pathways(self): """ Verify that the command raises an exception when it fails to retrieve pathways. """ UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) with self.assertRaises(SystemExit) as context: call_command('cache_programs') self.assertEqual(context.exception.code, 1) cached_pathways = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual(cached_pathways, [])
def fetch_program_details(self, client, uuids): programs = {} failure = False for uuid in uuids: try: cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) logger.info('Requesting details for program {uuid}.'.format(uuid=uuid)) program = client.programs(uuid).get(exclude_utm=1) programs[cache_key] = program except: # pylint: disable=bare-except logger.exception('Failed to retrieve details for program {uuid}.'.format(uuid=uuid)) failure = True continue return programs, failure
def test_handle_missing_pathways(self): """ Verify that the command raises an exception when it fails to retrieve pathways. """ UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) with self.assertRaises(SystemExit) as context: call_command('cache_programs') self.assertEqual(context.exception.code, 1) cached_pathways = cache.get( SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual(cached_pathways, [])
def test_get_many_with_missing(self, mock_cache, mock_warning, mock_info): programs = ProgramFactory.create_batch(3) all_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs } partial_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs[:2] } def fake_get_many(keys): if len(keys) == 1: return {PROGRAM_CACHE_KEY_TPL.format(uuid=programs[-1]['uuid']): programs[-1]} else: return partial_programs mock_cache.get.return_value = [program['uuid'] for program in programs] mock_cache.get_many.side_effect = fake_get_many with with_site_configuration_context(domain=self.site.name, configuration={'COURSE_CATALOG_API_URL': 'foo'}): actual_programs = get_programs(site=self.site) # All 3 cached programs should be returned. An info message should be # logged about the one that was initially missing, but the code should # be able to stitch together all the details. self.assertEqual( set(program['uuid'] for program in actual_programs), set(program['uuid'] for program in all_programs.values()) ) self.assertFalse(mock_warning.called) mock_info.assert_called_with('Failed to get details for 1 programs. Retrying.') for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) self.assertEqual(program, all_programs[key])
def test_get_one(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_uuid = expected_program['uuid'] self.assertEqual(get_programs(self.site, uuid=expected_uuid), None) mock_warning.assert_called_once_with( 'Failed to get details for program {uuid} from the cache.'.format( uuid=expected_uuid)) mock_warning.reset_mock() cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=expected_uuid), expected_program, None) actual_program = get_programs(self.site, uuid=expected_uuid) self.assertEqual(actual_program, expected_program) self.assertFalse(mock_warning.called)
def fetch_program_details(self, client, uuids): programs = {} failure = False for uuid in uuids: try: cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) logger.info(u'Requesting details for program {uuid}.'.format(uuid=uuid)) program = client.programs(uuid).get(exclude_utm=1) # pathways get added in process_pathways program['pathway_ids'] = [] programs[cache_key] = program except: # pylint: disable=bare-except logger.exception(u'Failed to retrieve details for program {uuid}.'.format(uuid=uuid)) failure = True continue return programs, failure
def test_get_one(self, mock_warning): expected_program = ProgramFactory() expected_uuid = expected_program['uuid'] self.assertEqual(get_programs(uuid=expected_uuid), None) mock_warning.assert_called_once_with( 'Details for program {uuid} are not cached.'.format( uuid=expected_uuid)) mock_warning.reset_mock() cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=expected_uuid), expected_program, None) actual_program = get_programs(uuid=expected_uuid) self.assertEqual(actual_program, expected_program) self.assertFalse(mock_warning.called)
def test_get_one(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_uuid = expected_program['uuid'] assert get_programs(uuid=expected_uuid) is None mock_warning.assert_called_once_with( f'Failed to get details for program {expected_uuid} from the cache.' ) mock_warning.reset_mock() cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=expected_uuid), expected_program, None) actual_program = get_programs(uuid=expected_uuid) assert actual_program == expected_program assert not mock_warning.called
def test_catalog_program_missing_org(self): """ Test OrganizationDoesNotExistException is thrown if the cached program does not have an authoring organization. """ program = ProgramFactory.create( uuid=self.program_uuid, authoring_organizations=[] ) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=self.program_uuid), program, None) organization = OrganizationFactory.create(short_name=self.organization_key) provider = SAMLProviderConfigFactory.create(organization=organization) self.create_social_auth_entry(self.user, provider, self.external_user_id) with pytest.raises(OrganizationDoesNotExistException): get_user_by_program_id(self.external_user_id, self.program_uuid)
def test_catalog_program_missing_org(self): """ Test OrganizationDoesNotExistException is thrown if the cached program does not have an authoring organization. """ program = ProgramFactory.create( uuid=self.program_uuid, authoring_organizations=[] ) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=self.program_uuid), program, None) organization = OrganizationFactory.create(short_name=self.organization_key) provider = SAMLProviderConfigFactory.create(organization=organization) self.create_social_auth_entry(self.user, provider, self.external_user_id) with pytest.raises(OrganizationDoesNotExistException): get_user_by_program_id(self.external_user_id, self.program_uuid)
def test_get_from_course(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_course = expected_program['courses'][0]['course_runs'][0][ 'key'] self.assertEqual(get_programs(course=expected_course), []) cache.set( COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_run_id=expected_course), [expected_program['uuid']], None) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=expected_program['uuid']), expected_program, None) actual_program = get_programs(course=expected_course) self.assertEqual(actual_program, [expected_program]) self.assertFalse(mock_warning.called)
def test_get_from_catalog_course(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_catalog_course = expected_program['courses'][0] assert get_programs( catalog_course_uuid=expected_catalog_course['uuid']) == [] cache.set( CATALOG_COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_uuid=expected_catalog_course['uuid']), [expected_program['uuid']], None) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=expected_program['uuid']), expected_program, None) actual_program = get_programs( catalog_course_uuid=expected_catalog_course['uuid']) assert actual_program == [expected_program] assert not mock_warning.called
def test_get_one(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_uuid = expected_program['uuid'] self.assertEqual(get_programs(self.site, uuid=expected_uuid), None) mock_warning.assert_called_once_with( 'Failed to get details for program {uuid} from the cache.'.format(uuid=expected_uuid) ) mock_warning.reset_mock() cache.set( PROGRAM_CACHE_KEY_TPL.format(uuid=expected_uuid), expected_program, None ) actual_program = get_programs(self.site, uuid=expected_uuid) self.assertEqual(actual_program, expected_program) self.assertFalse(mock_warning.called)
def fetch_program_details(self, client, uuids, api_base_url): # lint-amnesty, pylint: disable=missing-function-docstring programs = {} failure = False for uuid in uuids: try: cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) logger.info(f'Requesting details for program {uuid}.') api_url = urljoin(f"{api_base_url}/", f"programs/{uuid}/") response = client.get(api_url, params={"exclude_utm": 1}) response.raise_for_status() program = response.json() # pathways get added in process_pathways program['pathway_ids'] = [] programs[cache_key] = program except: # pylint: disable=bare-except logger.exception( f'Failed to retrieve details for program {uuid}.') failure = True continue return programs, failure
def test_get_from_course(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_course = expected_program['courses'][0]['course_runs'][0]['key'] self.assertEqual(get_programs(course=expected_course), []) cache.set( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=expected_course), [expected_program['uuid']], None ) cache.set( PROGRAM_CACHE_KEY_TPL.format(uuid=expected_program['uuid']), expected_program, None ) actual_program = get_programs(course=expected_course) self.assertEqual(actual_program, [expected_program]) self.assertFalse(mock_warning.called)
def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. uuids (list of string): UUIDs identifying a specific programs to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, uuids, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif site: uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning( u'Failed to get program UUIDs from the cache for site {}.'. format(site.domain)) return get_programs_by_uuids(uuids)
def _create_cached_program(self): """ helper method to create a cached program """ program = ProgramFactory.create() for course_key in self.course_keys: program['courses'].append(CourseFactory(id=course_key)) program['type'] = 'MicroBachelors' program['type_attrs']['coaching_supported'] = True for course in program['courses']: cache.set( CATALOG_COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_uuid=course['uuid']), [program['uuid']], None) course_run = course['course_runs'][0]['key'] cache.set( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course_run), [program['uuid']], None) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']), program, None) return program
def init_cache(self): """ This function plays the role of the ``cache_programs`` management command. """ all_programs = [ self.masters_program_1, self.masters_program_2, self.bachelors_program, self.no_type_program, self.masters_program_other_site ] cached_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in all_programs } cache.set_many(cached_programs, None) programs_by_type = defaultdict(list) programs_by_type_slug = defaultdict(list) for program in all_programs: program_type = normalize_program_type(program.get('type')) program_type_slug = (program.get('type_attrs') or {}).get('slug') site_id = self.site.id if program == self.masters_program_other_site: site_id = self.other_site.id program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format( site_id=site_id, program_type=program_type ) program_type_slug_cache_key = PROGRAMS_BY_TYPE_SLUG_CACHE_KEY_TPL.format( site_id=site_id, program_slug=program_type_slug ) programs_by_type[program_type_cache_key].append(program['uuid']) programs_by_type_slug[program_type_slug_cache_key].append(program['uuid']) cache.set_many(programs_by_type, None) cache.set_many(programs_by_type_slug, None)
def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. uuids (list of string): UUIDs identifying a specific programs to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, uuids, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get(COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif site: uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning(u'Failed to get program UUIDs from the cache for site {}.'.format(site.domain)) return get_programs_by_uuids(uuids)
def get_programs(site=None, uuid=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') missing_details_msg_tpl = u'Failed to get details for program {uuid} from the cache.' if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] else: uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning( u'Failed to get program UUIDs from the cache for site {}.'. format(site.domain)) programs = cache.get_many( [PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in uuids]) programs = list(programs.values()) # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_uuids = set(uuids) - set(program['uuid'] for program in programs) if missing_uuids: logger.info( u'Failed to get details for {count} programs. Retrying.'.format( count=len(missing_uuids))) retried_programs = cache.get_many([ PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in missing_uuids ]) programs += list(retried_programs.values()) still_missing_uuids = set(uuids) - set(program['uuid'] for program in programs) for uuid in still_missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs
def test_handle_programs(self): """ Verify that the command requests and caches program UUIDs and details. """ # Ideally, this user would be created in the test setup and deleted in # the one test case which covers the case where the user is missing. However, # that deletion causes "OperationalError: no such table: wiki_attachmentrevision" # when run on Jenkins. UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() self.mock_pathways(self.pathways) for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) call_command('cache_programs') cached_uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual(set(cached_uuids), set(self.uuids)) program_keys = list(programs.keys()) cached_programs = cache.get_many(program_keys) # Verify that the keys were all cache hits. self.assertEqual(set(cached_programs), set(programs)) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, program in cached_programs.items(): # cached programs have a pathways field added to them, remove before comparing del program['pathway_ids'] self.assertEqual(program, programs[key]) # the courses in the child program's first curriculum (the active one) # should point to both the child program and the first program # in the cache. for course in self.child_program['curricula'][0]['courses']: for course_run in course['course_runs']: course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_run_id=course_run['key']) self.assertIn(self.programs[0]['uuid'], cache.get(course_run_cache_key)) self.assertIn(self.child_program['uuid'], cache.get(course_run_cache_key)) # for each program, assert that the program's UUID is in a cached list of # program UUIDS by program type and a cached list of UUIDs by authoring organization for program in self.programs: program_type = normalize_program_type(program.get('type', 'None')) program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format( site_id=self.site.id, program_type=program_type) self.assertIn(program['uuid'], cache.get(program_type_cache_key)) for organization in program['authoring_organizations']: organization_cache_key = PROGRAMS_BY_ORGANIZATION_CACHE_KEY_TPL.format( org_key=organization['key']) self.assertIn(program['uuid'], cache.get(organization_cache_key))
def get_programs(site=None, uuid=None, uuids=None, course=None, catalog_course_uuid=None, organization=None): """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object to fetch programs of. uuid (string): UUID identifying a specific program to read from the cache. uuids (list of string): UUIDs identifying a specific programs to read from the cache. course (string): course run id identifying a specific course run to read from the cache. catalog_course_uuid (string): Catalog Course UUID organization (string): short name for specific organization to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([ arg for arg in (site, uuid, uuids, course, catalog_course_uuid, organization) if arg is not None ]) != 1: raise TypeError('get_programs takes exactly one argument') if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif catalog_course_uuid: uuids = cache.get( CATALOG_COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_uuid=catalog_course_uuid)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif site: site_config = getattr(site, 'configuration', None) catalog_url = site_config.get_value( 'COURSE_CATALOG_API_URL') if site_config else None if site_config and catalog_url: uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning( f'Failed to get program UUIDs from the cache for site {site.domain}.' ) else: uuids = [] elif organization: uuids = get_programs_for_organization(organization) if not uuids: return [] return get_programs_by_uuids(uuids)
def test_get_many(self, mock_warning, mock_info): programs = ProgramFactory.create_batch(3) # Cache details for 2 of 3 programs. partial_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs[:2] } cache.set_many(partial_programs, None) # When called before UUIDs are cached, the function should return an # empty list and log a warning. with with_site_configuration_context( domain=self.site.name, configuration={'COURSE_CATALOG_API_URL': 'foo'}): assert get_programs(site=self.site) == [] mock_warning.assert_called_once_with( f'Failed to get program UUIDs from the cache for site {self.site.domain}.' ) mock_warning.reset_mock() # Cache UUIDs for all 3 programs. cache.set( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site.domain), [program['uuid'] for program in programs], None) actual_programs = get_programs(site=self.site) # The 2 cached programs should be returned while info and warning # messages should be logged for the missing one. assert {program['uuid'] for program in actual_programs} == \ {program['uuid'] for program in partial_programs.values()} mock_info.assert_called_with( 'Failed to get details for 1 programs. Retrying.') mock_warning.assert_called_with( 'Failed to get details for program {uuid} from the cache.'.format( uuid=programs[2]['uuid'])) mock_warning.reset_mock() # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) assert program == partial_programs[key] # Cache details for all 3 programs. all_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs } cache.set_many(all_programs, None) actual_programs = get_programs(site=self.site) # All 3 programs should be returned. assert {program['uuid'] for program in actual_programs} ==\ {program['uuid'] for program in all_programs.values()} assert not mock_warning.called for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) assert program == all_programs[key]
def fake_get_many(keys): if len(keys) == 1: return {PROGRAM_CACHE_KEY_TPL.format(uuid=programs[-1]['uuid']): programs[-1]} else: return partial_programs
def _create_catalog_program(self, catalog_org): """ helper method to create a cached catalog program """ program = ProgramFactory.create(authoring_organizations=[catalog_org]) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']), program, None) return program
def fake_get_many(keys): if len(keys) == 1: return {PROGRAM_CACHE_KEY_TPL.format(uuid=programs[-1]['uuid']): programs[-1]} else: return partial_programs
def get_programs(site=None, uuid=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') missing_details_msg_tpl = u'Failed to get details for program {uuid} from the cache.' if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get(COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] else: uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning(u'Failed to get program UUIDs from the cache for site {}.'.format(site.domain)) programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in uuids]) programs = list(programs.values()) # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_uuids = set(uuids) - set(program['uuid'] for program in programs) if missing_uuids: logger.info( u'Failed to get details for {count} programs. Retrying.'.format(count=len(missing_uuids)) ) retried_programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in missing_uuids]) programs += list(retried_programs.values()) still_missing_uuids = set(uuids) - set(program['uuid'] for program in programs) for uuid in still_missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs
def setUp(self): super(EnrollmentTestMixin, self).setUp() cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=self.program_uuid), self.program, None)
def get_programs(uuid=None): """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: uuid (string): UUID identifying a specific program to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if waffle.switch_is_active('read_cached_programs'): missing_details_msg_tpl = 'Details for program {uuid} are not cached.' if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program uuids = cache.get(PROGRAM_UUIDS_CACHE_KEY, []) if not uuids: logger.warning('Program UUIDs are not cached.') programs = cache.get_many( [PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in uuids]) programs = list(programs.values()) missing_uuids = set(uuids) - set(program['uuid'] for program in programs) for uuid in missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs else: # Old implementation which may request programs in-process. To be removed # as part of LEARNER-382. catalog_integration = CatalogIntegration.current() if catalog_integration.enabled: try: user = User.objects.get( username=catalog_integration.service_username) except User.DoesNotExist: return [] api = create_catalog_api_client(user, catalog_integration) cache_key = '{base}.programs'.format( base=catalog_integration.CACHE_KEY) querystring = { 'exclude_utm': 1, 'status': ( 'active', 'retired', ), } if uuid: querystring['use_full_course_serializer'] = 1 return get_edx_api_data( catalog_integration, 'programs', api=api, resource_id=uuid, cache_key=cache_key if catalog_integration.is_cache_enabled else None, querystring=querystring, ) else: return []
def test_get_many(self, mock_warning, mock_info): programs = ProgramFactory.create_batch(3) # Cache details for 2 of 3 programs. partial_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs[:2] } cache.set_many(partial_programs, None) # When called before UUIDs are cached, the function should return an # empty list and log a warning. self.assertEqual(get_programs(self.site), []) mock_warning.assert_called_once_with('Failed to get program UUIDs from the cache.') mock_warning.reset_mock() # Cache UUIDs for all 3 programs. cache.set( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site.domain), [program['uuid'] for program in programs], None ) actual_programs = get_programs(self.site) # The 2 cached programs should be returned while info and warning # messages should be logged for the missing one. self.assertEqual( set(program['uuid'] for program in actual_programs), set(program['uuid'] for program in partial_programs.values()) ) mock_info.assert_called_with('Failed to get details for 1 programs. Retrying.') mock_warning.assert_called_with( 'Failed to get details for program {uuid} from the cache.'.format(uuid=programs[2]['uuid']) ) mock_warning.reset_mock() # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) self.assertEqual(program, partial_programs[key]) # Cache details for all 3 programs. all_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs } cache.set_many(all_programs, None) actual_programs = get_programs(self.site) # All 3 programs should be returned. self.assertEqual( set(program['uuid'] for program in actual_programs), set(program['uuid'] for program in all_programs.values()) ) self.assertFalse(mock_warning.called) for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) self.assertEqual(program, all_programs[key])
def handle(self, *args, **options): if waffle.switch_is_active('populate-multitenant-programs'): failure = False logger.info('populate-multitenant-programs switch is ON') catalog_integration = CatalogIntegration.current() username = catalog_integration.service_username try: user = User.objects.get(username=username) except User.DoesNotExist: logger.error( 'Failed to create API client. Service user {username} does not exist.'.format(username) ) raise programs = {} for site in Site.objects.all(): site_config = getattr(site, 'configuration', None) if site_config is None or not site_config.get_value('COURSE_CATALOG_API_URL'): logger.info('Skipping site {domain}. No configuration.'.format(domain=site.domain)) continue client = create_catalog_api_client(user, site=site) uuids, program_uuids_failed = self.get_site_program_uuids(client, site) new_programs, program_details_failed = self.fetch_program_details(client, uuids) if program_uuids_failed or program_details_failed: failure = True programs.update(new_programs) logger.info('Caching UUIDs for {total} programs for site {site_name}.'.format( total=len(uuids), site_name=site.domain, )) cache.set(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), uuids, None) successful = len(programs) logger.info('Caching details for {successful} programs.'.format(successful=successful)) cache.set_many(programs, None) if failure: # This will fail a Jenkins job running this command, letting site # operators know that there was a problem. sys.exit(1) else: catalog_integration = CatalogIntegration.current() username = catalog_integration.service_username try: user = User.objects.get(username=username) client = create_catalog_api_client(user) except User.DoesNotExist: logger.error( 'Failed to create API client. Service user {username} does not exist.'.format(username) ) raise try: querystring = { 'exclude_utm': 1, 'status': ('active', 'retired'), 'uuids_only': 1, } logger.info('Requesting program UUIDs.') uuids = client.programs.get(**querystring) except: # pylint: disable=bare-except logger.error('Failed to retrieve program UUIDs.') raise total = len(uuids) logger.info('Received {total} UUIDs.'.format(total=total)) programs = {} failure = False for uuid in uuids: try: logger.info('Requesting details for program {uuid}.'.format(uuid=uuid)) program = client.programs(uuid).get(exclude_utm=1) cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) programs[cache_key] = program except: # pylint: disable=bare-except logger.exception('Failed to retrieve details for program {uuid}.'.format(uuid=uuid)) failure = True continue successful = len(programs) logger.info('Caching details for {successful} programs.'.format(successful=successful)) cache.set_many(programs, None) logger.info('Caching UUIDs for {total} programs.'.format(total=total)) cache.set(PROGRAM_UUIDS_CACHE_KEY, uuids, None) if failure: # This will fail a Jenkins job running this command, letting site # operators know that there was a problem. sys.exit(1)
def setUp(self): super().setUp() cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=self.program_uuid), self.program, None)
def test_get_many(self, mock_warning, mock_info): programs = ProgramFactory.create_batch(3) # Cache details for 2 of 3 programs. partial_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs[:2] } cache.set_many(partial_programs, None) # When called before UUIDs are cached, the function should return an # empty list and log a warning. self.assertEqual(get_programs(self.site), []) mock_warning.assert_called_once_with( 'Failed to get program UUIDs from the cache.') mock_warning.reset_mock() # Cache UUIDs for all 3 programs. cache.set( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site.domain), [program['uuid'] for program in programs], None) actual_programs = get_programs(self.site) # The 2 cached programs should be returned while info and warning # messages should be logged for the missing one. self.assertEqual( set(program['uuid'] for program in actual_programs), set(program['uuid'] for program in partial_programs.values())) mock_info.assert_called_with( 'Failed to get details for 1 programs. Retrying.') mock_warning.assert_called_with( 'Failed to get details for program {uuid} from the cache.'.format( uuid=programs[2]['uuid'])) mock_warning.reset_mock() # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) self.assertEqual(program, partial_programs[key]) # Cache details for all 3 programs. all_programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in programs } cache.set_many(all_programs, None) actual_programs = get_programs(self.site) # All 3 programs should be returned. self.assertEqual( set(program['uuid'] for program in actual_programs), set(program['uuid'] for program in all_programs.values())) self.assertFalse(mock_warning.called) for program in actual_programs: key = PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']) self.assertEqual(program, all_programs[key])
def setUp(self): super(EnrollmentTestMixin, self).setUp() # lint-amnesty, pylint: disable=super-with-arguments cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=self.program_uuid), self.program, None)