def refresh_groups(self): group_list = self.source.list_feed_groups(self.__feed_name__) for group in group_list: my_group = self.group_by_name(group.name) if not my_group: g = FeedGroupMetadata(name=group.name, description=group.description, access_tier=group.access_tier, feed=self.metadata) g.last_sync = None
def _sync_feed_group_metadata( db: Session, feed_api_record: Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]], db_feeds: Dict[str, FeedMetadata], operation_id: Optional[str] = None, ) -> None: """ Add FeedGroupMetadata records to DB if they don't already exist :param db: database session :type db: Session :param feed_api_record: data from API client :type feed_api_record: Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]] :param db_feeds: map of feed names to FeedMetadata tied to DB session :type db_feeds: Dict[str, FeedMetadata] :param operation_id: UUID4 hexadecimal string :type operation_id: Optional[str] """ api_feed = feed_api_record["meta"] db_feed = db_feeds.get(api_feed.name) # Check for any update db_feed.description = api_feed.description db_feed.access_tier = api_feed.access_tier db_groups = {x.name: x for x in db_feed.groups} for api_group in feed_api_record.get("groups", []): db_group = db_groups.get(api_group.name) # Do this instead of a db.merge() to ensure no timestamps are reset or overwritten if not db_group: logger.debug( "Adding new feed metadata record to db: {} (operation_id={})".format( api_group.name, operation_id ) ) db_group = FeedGroupMetadata( name=api_group.name, description=api_group.description, access_tier=api_group.access_tier, feed=db_feed, enabled=True, ) db_group.last_sync = None db.add(db_group) else: logger.debug( "Feed group metadata already in db: {} (operation_id={})".format( api_group.name, operation_id ) ) db_group.access_tier = api_group.access_tier db_group.description = api_group.description
def mock_feed_metadata(anchore_db): """ Fixture for delivering mock feed and feed group metadata for metadata ops :param anchore_db: :return: """ feed_names = [] with session_scope() as db: for f in mock_feeds: feed_names.append(f['name']) feed = FeedMetadata() feed.name = f['name'] feed.description = f['description'] feed.enabled = True feed.access_tier = 0 feed.groups = [] for grp in f['groups']: g = FeedGroupMetadata() g.name = grp['name'] g.access_tier = 0 g.description = '' g.enabled = True g.feed_name = feed.name return feed_names
def test_get_groups_to_download_legacy(self): feed_group_metadata = [ FeedGroupMetadata(name="vulnerabilities:alpine:3.10", enabled=True), FeedGroupMetadata(name="vulnerabilities:alpine:3.11", enabled=True), ] feeds_to_sync = [ VulnerabilityFeed(metadata=FeedMetadata( name="vulnerabilities", enabled=True, groups=feed_group_metadata, )) ] sync_config = { "vulnerabilities": SyncConfig(enabled=True, url="www.anchore.com") } groups_to_download = LegacySyncUtilProvider( sync_config).get_groups_to_download({}, feeds_to_sync, "0") assert groups_to_download == feed_group_metadata
def get_groups_to_download( source_feeds: Dict[ str, Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]] ], feeds_to_sync: List[DataFeed], operation_id: str, ) -> List[FeedGroupMetadata]: """ Creates a FeedGroupMetadata record that is never added to the database. We purposefully avoid adding the feed attribute to the record so that this record does not get created implicitly by sqlalchemy back-population. Uses FeedMetadata from feeds_to_sync. Expects only one record is present for grypedb. :param source_feeds: mapping containing FeedAPIRecord and FeedAPIGroupRecord :type source_feeds: Dict[str, Dict[str, Union[FeedAPIRecord, List[FeedAPIGroupRecord]]]] :param feeds_to_sync: ordered list of DataFeed(s) to sync :type feeds_to_sync: List[DataFeed] :param operation_id: UUID4 hexadecimal string :type operation_id: Optional[str] :return: """ # TODO consider throwing exceptions if length is not 1 for these api_feed_group = source_feeds[GRYPE_DB_FEED_NAME]["groups"][0] feed_metadata = feeds_to_sync[0].metadata groups_to_download = [] if feed_metadata.enabled: groups_to_download.append( FeedGroupMetadata( name=api_feed_group.name, feed_name=feed_metadata.name, description=api_feed_group.description, access_tier=api_feed_group.access_tier, enabled=True, ) ) else: logger.info( "Will not sync/download feed %s because feed is explicitly disabled", feed_metadata.name, ) return groups_to_download
def sync_metadata(feed_client: IFeedSource, to_sync: list = None, operation_id=None) -> tuple: """ Get metadata from source and sync db metadata records to that (e.g. add any new groups or feeds) Executes as a unit-of-work for db, so will commit result and returns the records found on upstream source. If a record exists in db but was not found upstream, it is not returned :param feed_client: :param to_sync: list of string feed names to sync metadata on :return: tuple, first element: dict of names mapped to db records post-sync only including records successfully updated by upstream, second element is a list of tuples where each tuple is (failed_feed_name, error_obj) """ if not to_sync: return {}, [] db = get_session() try: logger.info( 'Syncing feed and group metadata from upstream source (operation_id={})' .format(operation_id)) source_resp = feed_client.list_feeds() if to_sync: feeds = filter(lambda x: x.name in to_sync, source_resp.feeds) else: feeds = [] failed = [] source_feeds = { x.name: { 'meta': x, 'groups': feed_client.list_feed_groups(x.name).groups } for x in feeds } logger.debug('Upstream feeds available: %s', source_feeds) db_feeds = DataFeeds._pivot_and_filter_feeds_by_config( to_sync, list(source_feeds.keys()), get_all_feeds(db)) for feed_name, feed_api_record in source_feeds.items(): try: logger.info( 'Syncing metadata for feed: {} (operation_id={})'. format(feed_name, operation_id)) api_feed = feed_api_record['meta'] db_feed = db_feeds.get(api_feed.name) # Do this instead of a db.merge() to ensure no timestamps are reset or overwritten if not db_feed: logger.debug( 'Adding new feed metadata record to db: {} (operation_id={})' .format(api_feed.name, operation_id)) db_feed = FeedMetadata( name=api_feed.name, description=api_feed.description, access_tier=api_feed.access_tier, enabled=True) db.add(db_feed) db.flush() else: logger.debug( 'Feed metadata already in db: {} (operation_id={})' .format(api_feed.name, operation_id)) # Check for any update db_feed.description = api_feed.description db_feed.access_tier = api_feed.access_tier db_groups = {x.name: x for x in db_feed.groups} for api_group in feed_api_record.get('groups', []): db_group = db_groups.get(api_group.name) # Do this instead of a db.merge() to ensure no timestamps are reset or overwritten if not db_group: logger.debug( 'Adding new feed metadata record to db: {} (operation_id={})' .format(api_group.name, operation_id)) db_group = FeedGroupMetadata( name=api_group.name, description=api_group.description, access_tier=api_group.access_tier, feed=db_feed, enabled=True) db_group.last_sync = None db.add(db_group) else: logger.debug( 'Feed group metadata already in db: {} (operation_id={})' .format(api_group.name, operation_id)) db_group.access_tier = api_group.access_tier db_group.description = api_group.description except Exception as e: logger.exception('Error syncing feed {}'.format(feed_name)) logger.warn( 'Could not sync metadata for feed: {} (operation_id={})' .format(feed_name, operation_id)) failed.append((feed_name, e)) finally: db.flush() # Reload db_feeds = DataFeeds._pivot_and_filter_feeds_by_config( to_sync, list(source_feeds.keys()), get_all_feeds(db)) db.commit() logger.info( 'Metadata sync from feeds upstream source complete (operation_id={})' .format(operation_id)) return db_feeds, failed except Exception as e: logger.error( 'Rolling back feed metadata update due to error: {} (operation_id={})' .format(e, operation_id)) db.rollback() raise
mock_feeds = [ FeedMetadata(name='vulnerabilities', description='Test version of vulnerabilities feed', access_tier=0, enabled=True), FeedMetadata(name='github', description='Test version of github feed', access_tier=0, enabled=True) ] mock_feed_groups = [ FeedGroupMetadata(name='debian:8', feed_name='vulnerabilities', enabled=True, description='Fake debian 8 vuln data', access_tier=0), FeedGroupMetadata(name='debian:9', feed_name='vulnerabilities', enabled=True, description='Fake debian 9 vuln data', access_tier=0), FeedGroupMetadata(name='github:pip', feed_name='github', enabled=True, description='Github python/pip data', access_tier=0) ] mock_vulnerabilities = [
class TestSyncUtilProvider: @pytest.mark.parametrize( "sync_util_provider, sync_configs, expected_to_sync_after_filtering", [ ( LegacySyncUtilProvider, { "packages": SyncConfig(url="www.anchore.com", enabled=True) }, ["packages"], ), ( LegacySyncUtilProvider, { "nvdv2": SyncConfig(url="www.anchore.com", enabled=True), "vulnerabilities": SyncConfig(url="www.anchore.com", enabled=True), }, ["nvdv2", "vulnerabilities"], ), ( GrypeDBSyncUtilProvider, { "grypedb": SyncConfig(url="www.anchore.com", enabled=True) }, ["grypedb"], ), ( GrypeDBSyncUtilProvider, { "grypedb": SyncConfig(url="www.anchore.com", enabled=True), "packages": SyncConfig(url="www.anchore.com", enabled=True), }, ["grypedb"], ), ], ) def test_get_filtered_sync_configs( self, sync_util_provider: Type[SyncUtilProvider], sync_configs: Dict[str, SyncConfig], expected_to_sync_after_filtering: List[str], ): """ This is a bit confusing and probably should be changed, which is why i've written a test for it. There are two SyncUtilProviders. The LegacySyncUtilProvider works for all feeds that follow the legacy format. The GrypeDBSyncUtilProvider works for the GrypeDB feed format. However, the VulnerabilitiesProvider has two implementations. The LegacyProvider contains all vulnerability logic that changes when the provider is set to "legacy" The GrypeProvider contains all vulnerability logic that changes when the provider is set to "grype" As such, the GrypeProvider actually returns both "packages" and "grypedb" SyncConfigs, while "packages" is actually a Legacy style feed. Meanwhile, the "packages" feed can only be synced by the LegacySyncUtilProvider. The solution is likely to wrap the entire sync method with the SyncUtilProvider, that way LegacySyncUtilProvider can just do legacy feeds, while GrypeDBSyncUtilProvider will first do "grypedb" feed with the grype logic and then do "packages" feed with the legacy logic. """ filtered_configs = sync_util_provider._get_filtered_sync_configs( sync_configs) assert set(filtered_configs) == set(expected_to_sync_after_filtering) @pytest.mark.parametrize( "sync_util_provider, sync_configs, expected_client_class", [ ( LegacySyncUtilProvider, { "vulnerabilities": SyncConfig(url="www.anchore.com", enabled=True) }, FeedServiceClient, ), ( GrypeDBSyncUtilProvider, { "grypedb": SyncConfig(url="www.anchore.com", enabled=True) }, GrypeDBServiceClient, ), ], ) def test_get_client( self, sync_util_provider: Type[SyncUtilProvider], sync_configs: Dict[str, SyncConfig], expected_client_class: Type[IFeedSource], ): client = sync_util_provider(sync_configs).get_client() assert isinstance(client, expected_client_class) @pytest.mark.parametrize( "metadata, expected_number_groups, expected_feed_group_metadata", [ ( FeedMetadata(name="grypedb", enabled=True), 1, FeedGroupMetadata(name="grypedb:vulnerabilities", feed_name="grypedb", enabled=True), ), (FeedMetadata(name="grypedb", enabled=False), 0, None), ], ) def test_get_groups_to_download_grype( self, metadata: FeedMetadata, expected_number_groups: int, expected_feed_group_metadata: FeedMetadata, ): source_feeds = { "grypedb": { "meta": FeedList(feeds=[ FeedAPIRecord( name="grypedb", description="grypedb feed", access_tier="0", ) ]), "groups": [ FeedAPIGroupRecord( name="grypedb:vulnerabilities", description="grypedb:vulnerabilities group", access_tier="0", grype_listing=GrypeDBListing( built=anchore_now_datetime(), version="2", url="www.anchore.com", checksum="sha256:xxx", ), ) ], } } feeds_to_sync = [GrypeDBFeed(metadata=metadata)] sync_config = { "grypedb": SyncConfig(enabled=True, url="www.anchore.com") } groups_to_download = GrypeDBSyncUtilProvider( sync_config).get_groups_to_download(source_feeds, feeds_to_sync, "0") assert len(groups_to_download) == expected_number_groups if expected_number_groups > 0: group = groups_to_download[0] assert group.enabled == expected_feed_group_metadata.enabled assert group.feed_name == expected_feed_group_metadata.feed_name assert group.name == expected_feed_group_metadata.name def test_get_groups_to_download_legacy(self): feed_group_metadata = [ FeedGroupMetadata(name="vulnerabilities:alpine:3.10", enabled=True), FeedGroupMetadata(name="vulnerabilities:alpine:3.11", enabled=True), ] feeds_to_sync = [ VulnerabilityFeed(metadata=FeedMetadata( name="vulnerabilities", enabled=True, groups=feed_group_metadata, )) ] sync_config = { "vulnerabilities": SyncConfig(enabled=True, url="www.anchore.com") } groups_to_download = LegacySyncUtilProvider( sync_config).get_groups_to_download({}, feeds_to_sync, "0") assert groups_to_download == feed_group_metadata
access_tier=0, enabled=True, ), FeedMetadata( name="github", description="Test version of github feed", access_tier=0, enabled=True, ), ] mock_feed_groups = [ FeedGroupMetadata( name="debian:8", feed_name="vulnerabilities", enabled=True, description="Fake debian 8 vuln data", access_tier=0, ), FeedGroupMetadata( name="debian:9", feed_name="vulnerabilities", enabled=True, description="Fake debian 9 vuln data", access_tier=0, ), FeedGroupMetadata( name="github:pip", feed_name="github", enabled=True, description="Github python/pip data",