def test_get_many(self, mock_warning, mock_info): pathways = PathwayFactory.create_batch(3) # Cache details for 2 of 3 programs. partial_pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways[:2] } cache.set_many(partial_pathways, None) # When called before pathways are cached, the function should return an # empty list and log a warning. assert get_pathways(self.site) == [] mock_warning.assert_called_once_with( 'Failed to get credit pathway ids from the cache.') mock_warning.reset_mock() # Cache all 3 pathways cache.set( SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site.domain), [pathway['id'] for pathway in pathways], None) actual_pathways = get_pathways(self.site) # The 2 cached pathways should be returned while info and warning # messages should be logged for the missing one. assert {pathway['id'] for pathway in actual_pathways} ==\ {pathway['id'] for pathway in partial_pathways.values()} mock_info.assert_called_with( 'Failed to get details for 1 pathways. Retrying.') mock_warning.assert_called_with( 'Failed to get details for credit pathway {id} from the cache.'. format(id=pathways[2]['id'])) mock_warning.reset_mock() # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all pathways came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for pathway in actual_pathways: key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']) assert pathway == partial_pathways[key] # Cache details for all 3 pathways. all_pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways } cache.set_many(all_pathways, None) actual_pathways = get_pathways(self.site) # All 3 pathways should be returned. assert {pathway['id'] for pathway in actual_pathways} ==\ {pathway['id'] for pathway in all_pathways.values()} assert not mock_warning.called for pathway in actual_pathways: key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']) assert pathway == all_pathways[key]
def get_pathways(site, pathway_id=None): """ Read pathways from the cache. The cache is populated by a management command, cache_programs. Arguments: site (Site): django.contrib.sites.models object Keyword Arguments: pathway_id (string): id identifying a specific pathway to read from the cache. Returns: list of dict, representing pathways. dict, if a specific pathway is requested. """ missing_details_msg_tpl = 'Failed to get details for credit pathway {id} from the cache.' if pathway_id: pathway = cache.get(PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)) if not pathway: logger.warning(missing_details_msg_tpl.format(id=pathway_id)) return pathway pathway_ids = cache.get( SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not pathway_ids: logger.warning('Failed to get credit pathway ids from the cache.') pathways = cache.get_many([ PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in pathway_ids ]) pathways = pathways.values() # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways) if missing_ids: logger.info( 'Failed to get details for {count} pathways. Retrying.'.format( count=len(missing_ids))) retried_pathways = cache.get_many([ PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in missing_ids ]) pathways += retried_pathways.values() still_missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways) for missing_id in still_missing_ids: logger.warning(missing_details_msg_tpl.format(id=missing_id)) return pathways
def test_get_many_with_missing(self, mock_cache, mock_warning, mock_info): pathways = PathwayFactory.create_batch(3) all_pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways } partial_pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways[:2] } def fake_get_many(keys): if len(keys) == 1: return {PATHWAY_CACHE_KEY_TPL.format(id=pathways[-1]['id']): pathways[-1]} else: return partial_pathways mock_cache.get.return_value = [pathway['id'] for pathway in pathways] mock_cache.get_many.side_effect = fake_get_many actual_pathways = get_pathways(self.site) # All 3 cached pathways should be returned. An info message should be # logged about the one that was initially missing, but the code should # be able to stitch together all the details. self.assertEqual( set(pathway['id'] for pathway in actual_pathways), set(pathway['id'] for pathway in all_pathways.values()) ) self.assertFalse(mock_warning.called) mock_info.assert_called_with('Failed to get details for 1 pathways. Retrying.') for pathway in actual_pathways: key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']) self.assertEqual(pathway, all_pathways[key])
def get_pathways(site, pathway_id=None): """ Read pathways from the cache. The cache is populated by a management command, cache_programs. Arguments: site (Site): django.contrib.sites.models object Keyword Arguments: pathway_id (string): id identifying a specific pathway to read from the cache. Returns: list of dict, representing pathways. dict, if a specific pathway is requested. """ missing_details_msg_tpl = 'Failed to get details for credit pathway {id} from the cache.' if pathway_id: pathway = cache.get(PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)) if not pathway: logger.warning(missing_details_msg_tpl.format(id=pathway_id)) return pathway pathway_ids = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not pathway_ids: logger.warning('Failed to get credit pathway ids from the cache.') pathways = cache.get_many([PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in pathway_ids]) pathways = pathways.values() # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways) if missing_ids: logger.info( 'Failed to get details for {count} pathways. Retrying.'.format(count=len(missing_ids)) ) retried_pathways = cache.get_many( [PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in missing_ids] ) pathways += retried_pathways.values() still_missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways) for missing_id in still_missing_ids: logger.warning(missing_details_msg_tpl.format(id=missing_id)) return pathways
def process_pathways(self, site, pathways, programs): """ For each program, add references to each pathway it is a part of. For each pathway, replace the "programs" dict with "program_uuids", which only contains uuids (since program data is already cached) """ processed_pathways = {} failure = False for pathway in pathways: try: pathway_id = pathway['id'] pathway_cache_key = PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) processed_pathways[pathway_cache_key] = pathway uuids = [] for program in pathway['programs']: program_uuid = program['uuid'] program_cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=program_uuid) programs[program_cache_key]['pathway_ids'].append(pathway_id) uuids.append(program_uuid) del pathway['programs'] pathway['program_uuids'] = uuids except: # pylint: disable=bare-except logger.error('Failed to process pathways for {domain}'.format(domain=site.domain)) failure = True return processed_pathways, programs, failure
def process_pathways(self, site, pathways, programs): """ For each program, add references to each pathway it is a part of. For each pathway, replace the "programs" dict with "program_uuids", which only contains uuids (since program data is already cached) """ processed_pathways = {} failure = False for pathway in pathways: try: pathway_id = pathway['id'] pathway_cache_key = PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) processed_pathways[pathway_cache_key] = pathway uuids = [] for program in pathway['programs']: program_uuid = program['uuid'] program_cache_key = PROGRAM_CACHE_KEY_TPL.format( uuid=program_uuid) programs[program_cache_key]['pathway_ids'].append( pathway_id) uuids.append(program_uuid) del pathway['programs'] pathway['program_uuids'] = uuids except: # pylint: disable=bare-except logger.exception( f'Failed to process pathways for {site.domain}') failure = True return processed_pathways, programs, failure
def fake_get_many(keys): if len(keys) == 1: return { PATHWAY_CACHE_KEY_TPL.format(id=pathways[-1]['id']): pathways[-1] } else: return partial_pathways
def test_pathways_multiple_pages(self): """ Verify that the command properly caches credit pathways when multiple pages are returned from its endpoint """ UserFactory(username=self.catalog_integration.service_username) new_pathways = PathwayFactory.create_batch(40) for new_pathway in new_pathways: new_pathway['programs'] = [] pathways = self.pathways + new_pathways programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) # mock 3 pages of credit pathways, starting at the last self.mock_pathways(pathways[40:], page_number=3, final=True) self.mock_pathways(pathways[20:40], page_number=2, final=False) self.mock_pathways(pathways[:20], page_number=1, final=False) call_command('cache_programs') pathways_dict = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways } pathway_keys = list(pathways_dict.keys()) cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual( set(cached_pathway_keys), set(pathway_keys) ) cached_pathways = cache.get_many(pathway_keys) self.assertEqual( set(cached_pathways), set(pathways_dict) ) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all pathways came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, pathway in cached_pathways.items(): # cached pathways store just program uuids instead of the full programs, transform before comparing pathways_dict[key]['program_uuids'] = [program['uuid'] for program in pathways_dict[key]['programs']] del pathways_dict[key]['programs'] self.assertEqual(pathway, pathways_dict[key])
def test_get_one(self, mock_warning, _mock_info): expected_pathway = PathwayFactory() expected_id = expected_pathway['id'] self.assertEqual(get_pathways(self.site, pathway_id=expected_id), None) mock_warning.assert_called_once_with( u'Failed to get details for credit pathway {id} from the cache.'. format(id=expected_id)) mock_warning.reset_mock() cache.set(PATHWAY_CACHE_KEY_TPL.format(id=expected_id), expected_pathway, None) actual_pathway = get_pathways(self.site, pathway_id=expected_id) self.assertEqual(actual_pathway, expected_pathway) self.assertFalse(mock_warning.called)
def test_get_one(self, mock_warning, _mock_info): expected_pathway = PathwayFactory() expected_id = expected_pathway['id'] assert get_pathways(self.site, pathway_id=expected_id) is None mock_warning.assert_called_once_with( f'Failed to get details for credit pathway {expected_id} from the cache.' ) mock_warning.reset_mock() cache.set(PATHWAY_CACHE_KEY_TPL.format(id=expected_id), expected_pathway, None) actual_pathway = get_pathways(self.site, pathway_id=expected_id) assert actual_pathway == expected_pathway assert not mock_warning.called
def test_handle_pathways(self): """ Verify that the command requests and caches credit pathways """ UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in self.pathways } self.mock_list() self.mock_pathways(self.pathways) for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) call_command('cache_programs') cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain)) pathway_keys = list(pathways.keys()) self.assertEqual( set(cached_pathway_keys), set(pathway_keys) ) cached_pathways = cache.get_many(pathway_keys) self.assertEqual( set(cached_pathways), set(pathways) ) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all pathways came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, pathway in cached_pathways.items(): # cached pathways store just program uuids instead of the full programs, transform before comparing pathways[key]['program_uuids'] = [program['uuid'] for program in pathways[key]['programs']] del pathways[key]['programs'] self.assertEqual(pathway, pathways[key])
def test_get_one(self, mock_warning, _mock_info): expected_pathway = PathwayFactory() expected_id = expected_pathway['id'] self.assertEqual(get_pathways(self.site, pathway_id=expected_id), None) mock_warning.assert_called_once_with( 'Failed to get details for credit pathway {id} from the cache.'.format(id=expected_id) ) mock_warning.reset_mock() cache.set( PATHWAY_CACHE_KEY_TPL.format(id=expected_id), expected_pathway, None ) actual_pathway = get_pathways(self.site, pathway_id=expected_id) self.assertEqual(actual_pathway, expected_pathway) self.assertFalse(mock_warning.called)
def fake_get_many(keys): if len(keys) == 1: return {PATHWAY_CACHE_KEY_TPL.format(id=pathways[-1]['id']): pathways[-1]} else: return partial_pathways
def test_get_many(self, mock_warning, mock_info): pathways = PathwayFactory.create_batch(3) # Cache details for 2 of 3 programs. partial_pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways[:2] } cache.set_many(partial_pathways, None) # When called before pathways are cached, the function should return an # empty list and log a warning. self.assertEqual(get_pathways(self.site), []) mock_warning.assert_called_once_with('Failed to get credit pathway ids from the cache.') mock_warning.reset_mock() # Cache all 3 pathways cache.set( SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site.domain), [pathway['id'] for pathway in pathways], None ) actual_pathways = get_pathways(self.site) # The 2 cached pathways should be returned while info and warning # messages should be logged for the missing one. self.assertEqual( set(pathway['id'] for pathway in actual_pathways), set(pathway['id'] for pathway in partial_pathways.values()) ) mock_info.assert_called_with('Failed to get details for 1 pathways. Retrying.') mock_warning.assert_called_with( 'Failed to get details for credit pathway {id} from the cache.'.format(id=pathways[2]['id']) ) mock_warning.reset_mock() # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all pathways came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for pathway in actual_pathways: key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']) self.assertEqual(pathway, partial_pathways[key]) # Cache details for all 3 pathways. all_pathways = { PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways } cache.set_many(all_pathways, None) actual_pathways = get_pathways(self.site) # All 3 pathways should be returned. self.assertEqual( set(pathway['id'] for pathway in actual_pathways), set(pathway['id'] for pathway in all_pathways.values()) ) self.assertFalse(mock_warning.called) for pathway in actual_pathways: key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']) self.assertEqual(pathway, all_pathways[key])