def _check_feed_client_credentials(): from anchore_engine.services.policy_engine.engine.feeds.client import get_client sleep_time = feed_config_check_backoff last_ex = None for i in range(feed_config_check_retries): if i > 0: logger.info("Waiting for {} seconds to try feeds client config check again".format(sleep_time)) time.sleep(sleep_time) sleep_time += feed_config_check_backoff try: logger.info('Checking feeds client credentials. Attempt {} of {}'.format(i + 1, feed_config_check_retries)) client = get_client() client = None logger.info('Feeds client credentials ok') return True except Exception as e: logger.warn("Could not verify feeds endpoint and/or config. Got exception: {}".format(e)) last_ex = e else: if last_ex: raise last_ex else: raise Exception('Exceeded retries for feeds client config check. Failing check')
def test_feed_sync(): test_client = get_client(feeds_url=feed_url, user=('*****@*****.**', 'pbiU2RYZ2XrmYQ'), conn_timeout=10, read_timeout=30) for f in test_client.list_feeds().feeds: try: test_client.list_feed_groups(f.name) except Exception as e: logger.info(('Caught: {} for feed: {}'.format(e, f))) next_token = False since_time = None feed = 'vulnerabilities' group = 'alpine:3.6' last_token = None while next_token is not None: logger.info('Getting a page of data') if next_token: last_token = next_token logger.info('Using token: {}'.format(next_token)) data = test_client.get_feed_group_data(feed, group, since=since_time, next_token=next_token) else: last_token = None data = test_client.get_feed_group_data(feed, group, since=since_time) next_token = data.next_token logger.info('Got {} items and new next token: {}'.format(data.record_count, next_token)) if next_token: assert(next_token != last_token) assert(len(data.data) > 0)
def test_anon_user(): test_client = get_client(feeds_url=feed_url, user=('*****@*****.**', 'pbiU2RYZ2XrmYQ'), conn_timeout=10, read_timeout=30) for f in test_client.list_feeds().feeds: try: test_client.list_feed_groups(f.name) except Exception as e: logger.error(('Caught: {} for feed: {}'.format(e, f))) test_client.get_feed_group_data('vulnerabilities', 'alpine:3.6',since=datetime.datetime.utcnow())
def sync(to_sync=None, full_flush=False, catalog_client=None, feed_client=None, operation_id=None): """ Sync all feeds. :return: """ result = [] if not feed_client: feed_client = get_client() logger.info('Performing sync of feeds: {} (operation_id={})'.format( 'all' if to_sync is None else to_sync, operation_id)) updated, failed = DataFeeds.sync_metadata(feed_client=feed_client, to_sync=to_sync, operation_id=operation_id) updated_names = set(updated.keys()) # Feeds configured to sync but that were not on the upstream source at all for feed_name in set(to_sync).difference(updated_names): failed.append((feed_name, 'Feed not found on upstream source')) # Build the list of feed instances to execute the syncs on feeds_to_sync = [] for feed_name in updated_names: try: feeds_to_sync.append(feed_instance_by_name(feed_name)) except KeyError as e: logger.error( 'Could not initialize metadata for feed {}. Error: No feed implementation found for feed {}. (operation_id={})' .format(feed_name, str(e), operation_id)) failed.append((feed_name, e)) except Exception as e: logger.error( 'Could not initialize metadata for feed {}. Error: {}. (operation_id={})' .format(feed_name, str(e), operation_id)) logger.warn( 'Cannot sync metadata for feed {} from upstream source. Skipping. (operation_id={})' .format(feed_name, operation_id)) failed.append((feed_name, e)) # Process the feeds that failed for any reason pre-data-download result.extend( DataFeeds._process_failed_feeds(failed_tuples=failed, catalog_client=catalog_client, operation_id=operation_id)) # Sort the feed instances for the syncing process to ensure highest priority feeds sync first (e.g. vulnerabilities before package metadatas) feeds_to_sync = _ordered_feeds(feeds_to_sync) # Do the fetches groups_to_download = [] for f in feeds_to_sync: logger.info( 'Initialized feed to sync: {} (operation_id={})'.format( f.__feed_name__, operation_id)) if f.metadata: if f.metadata.enabled: for g in f.metadata.groups: if g.enabled: groups_to_download.append(g) else: logger.info( "Will not sync/download group {} of feed {} because group is explicitly disabled" .format(g.name, g.feed_name)) else: logger.info( 'Skipping feed {} because it is explicitly not enabled' .format(f.__feed_name__)) else: logger.warn( 'No metadata found for feed {}. Unexpected but not an error (operation_id={})' .format(f.__feed_name__, operation_id)) logger.debug('Groups to download {}'.format(groups_to_download)) if not feed_client: feed_client = get_client() base_dir = DataFeeds.__scratch_dir__ if DataFeeds.__scratch_dir__ else localconfig.get_config( ).get('tmp_dir') download_dir = os.path.join(base_dir, 'policy_engine_tmp', 'feed_syncs') feed_data_repo = None try: # Order by feed for f in feeds_to_sync: feed_result = build_feed_sync_results(feed=f.__feed_name__, status='failure') feed_result['status'] = 'success' try: # Feed level notification and log msg notify_event(FeedSyncStarted(feed=f.__feed_name__), catalog_client, operation_id=operation_id) groups_to_sync = [ x for x in groups_to_download if x.feed_name == f.__feed_name__ ] logger.debug('Groups to sync {}'.format(groups_to_sync)) # Filter groups by that feed for g in groups_to_sync: # Down load just one group into a download result group_download_config = DownloadOperationConfiguration.generate_new( feed_client.feed_url, db_groups_to_sync=[g]) downloader = FeedDownloader( download_root_dir=download_dir, config=group_download_config, client=feed_client, fetch_all=full_flush) logger.debug('Groups to download {}'.format( downloader.config.groups)) try: notify_event(FeedGroupSyncStarted(feed=g.feed_name, group=g.name), catalog_client, operation_id=operation_id) logger.info( 'Beginning feed data fetch (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) feed_data_repo = downloader.execute( feed_name=g.feed_name, group_name=g.name) logger.info( 'Download complete. Syncing to db (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) f_result = DataFeeds.sync_from_fetched( feed_data_repo, catalog_client=catalog_client, operation_id=operation_id, full_flush=full_flush) # Extract the single group record... group_result = _get_group_result(f_result) logger.info( 'DB Sync complete (feed={}, group={}, operation_id={})' .format(g.feed_name, g.name, operation_id)) if group_result['status'] == 'success': notify_event(FeedGroupSyncCompleted( feed=f.__feed_name__, group=g.name, result=group_result), catalog_client, operation_id=operation_id) else: # If any fails, the whole feed is marked as failed feed_result['status'] = 'failure' notify_event(FeedGroupSyncFailed( feed=f.__feed_name__, group=g.name, error='Failed to sync to db'), catalog_client, operation_id=operation_id) feed_result['groups'].append(group_result) except Exception as e: logger.error( 'Error syncing {}/{} (operation_id={})'.format( g.feed_name, g.name, operation_id)) notify_event( FeedGroupSyncFailed(feed=g.feed_name, group=g.name, error=e), catalog_client, operation_id) feed_result['status'] = 'failure' finally: try: feed_data_repo.teardown() except: logger.exception( 'Could not cleanup download repo due to error' ) feed_data_repo = None except Exception as e: logger.error('Error syncing {} (operation_id={})'.format( f, operation_id)) if feed_result['status'] == 'success': notify_event(FeedSyncCompleted(feed=f.__feed_name__), catalog_client, operation_id) else: notify_event( FeedSyncFailed( feed=f.__feed_name__, error='One or more groups failed to sync'), catalog_client, operation_id) result.append(feed_result) finally: if feed_data_repo: feed_data_repo.teardown() return result
def test_feed_downloader(): """ Requires network access to the public feed service ancho.re :return: """ groups_to_sync = [ GroupDownloadOperationConfiguration( feed='vulnerabilities', group='alpine:3.7', parameters=GroupDownloadOperationParams(since=None)), GroupDownloadOperationConfiguration( feed='vulnerabilities', group='alpine:3.8', parameters=GroupDownloadOperationParams(since=None)), #GroupDownloadOperationConfiguration(feed='nvdv2', group='nvdv2:cves', parameters=GroupDownloadOperationParams(since=None)) ] dl_conf = DownloadOperationConfiguration(groups=groups_to_sync, uuid=uuid.uuid4().hex, source_uri=ANCHOREIO_URI) tmpdir = tempfile.mkdtemp(prefix='anchoretest_repo-') data_repo = None try: client = get_client(ANCHOREIO_URI, user=('something', 'something'), conn_timeout=1, read_timeout=30) fetcher = FeedDownloader(download_root_dir=tmpdir, config=dl_conf, client=client, fetch_all=False) with timer('feed download', log_level='info'): data_repo = fetcher.execute() assert data_repo is not None assert data_repo.root_dir.startswith(tmpdir) assert data_repo.metadata.data_write_dir.startswith(tmpdir) assert os.path.isdir(data_repo.metadata.data_write_dir) assert os.path.isdir(data_repo.root_dir) assert len(os.listdir(tmpdir)) > 0 count = 0 with timer('alpine 3.8 iterate', log_level='info'): for _ in data_repo.read('vulnerabilities', 'alpine:3.8', 0): count += 1 assert count == sum([ x.total_records for x in data_repo.metadata.download_result.results if x.feed == 'vulnerabilities' and x.group == 'alpine:3.8' ]) with timer('alpine 3.7 iterate', log_level='info'): for _ in data_repo.read('vulnerabilities', 'alpine:3.7', 0): count += 1 assert count == sum([ x.total_records for x in data_repo.metadata.download_result.results ]) finally: logger.info('Cleaning up temp dir') if data_repo: data_repo.teardown()