예제 #1
0
    def _get_manifest_iterator(self, indexer_state, min_id, max_id):
        reindex_threshold = lambda: datetime.utcnow() - timedelta(
            seconds=self.app.config.get("SECURITY_SCANNER_V4_REINDEX_THRESHOLD"
                                        ))

        # TODO(alecmerdler): Filter out any `Manifests` that are still being uploaded
        def not_indexed_query():
            return (Manifest.select().join(
                ManifestSecurityStatus,
                JOIN.LEFT_OUTER).where(ManifestSecurityStatus.id >> None))

        def index_error_query():
            return (Manifest.select().join(ManifestSecurityStatus).where(
                ManifestSecurityStatus.index_status == IndexStatus.FAILED,
                ManifestSecurityStatus.last_indexed < reindex_threshold(),
            ))

        def needs_reindexing_query(indexer_hash):
            return (Manifest.select().join(ManifestSecurityStatus).where(
                ManifestSecurityStatus.index_status !=
                IndexStatus.MANIFEST_UNSUPPORTED,
                ManifestSecurityStatus.indexer_hash != indexer_hash,
                ManifestSecurityStatus.last_indexed < reindex_threshold(),
            ))

        # 4^log10(total) gives us a scalable batch size into the billions.
        batch_size = int(4**log10(max(10, max_id - min_id)))

        # TODO(alecmerdler): We want to index newer manifests first, while backfilling older manifests...
        iterator = itertools.chain(
            yield_random_entries(
                not_indexed_query,
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
            yield_random_entries(
                index_error_query,
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
            yield_random_entries(
                lambda: needs_reindexing_query(indexer_state.get("state", "")),
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
        )

        return iterator
예제 #2
0
  def repositories_to_mirror(self, start_token=None):
    def batch_query():
      return get_eligible_mirrors()

    # Find the minimum ID.
    if start_token is not None:
      min_id = start_token.min_id
    else:
      min_id = get_min_id_for_repo_mirror_config()

    # Get the ID of the last repository mirror config. Will be None if there are none in the database.
    max_id = get_max_id_for_repo_mirror_config()
    if max_id is None:
      return (None, None)

    if min_id is None or min_id > max_id:
      return (None, None)

    # 4^log10(total) gives us a scalable batch size into the billions.
    batch_size = int(4**log10(max(10, max_id - min_id)))

    iterator = yield_random_entries(
      batch_query,
      RepoMirrorConfig.id,
      batch_size,
      max_id,
      min_id)

    return (iterator, RepoMirrorToken(max_id + 1))
예제 #3
0
    def _run_counting(self):
        def batch_query():
            return database.Repository.select()

        min_id = model.repository.get_min_id()
        max_id = model.repository.get_max_id()
        if min_id is None or max_id is None:
            return

        # 4^log10(total) gives us a scalable batch size into the billions.
        batch_size = int(4**log10(max(10, max_id - min_id)))

        iterator = yield_random_entries(
            batch_query,
            database.Repository.id,
            batch_size,
            max_id,
            min_id,
        )

        yesterday = date.today() - timedelta(days=1)
        for candidate, abt, num_remaining in iterator:
            if model.repositoryactioncount.has_repository_action_count(
                    candidate, yesterday):
                abt.set()
                continue

            if not self._count_repository_actions(candidate):
                abt.set()
예제 #4
0
    def candidates_to_scan(self, target_version, start_token=None):
        def batch_query():
            return get_images_eligible_for_scan(target_version)

        # Find the minimum ID.
        min_id = None
        if start_token is not None:
            min_id = start_token.min_id
        else:
            min_id = app.config.get("SECURITY_SCANNER_INDEXING_MIN_ID")
            if min_id is None:
                min_id = get_min_id_for_sec_scan(target_version)

        # Get the ID of the last image we can analyze. Will be None if there are no images in the
        # database.
        max_id = get_max_id_for_sec_scan()
        if max_id is None:
            return (None, None)

        if min_id is None or min_id > max_id:
            return (None, None)

        # 4^log10(total) gives us a scalable batch size into the billions.
        batch_size = int(4 ** log10(max(10, max_id - min_id)))

        # TODO: Once we have a clean shared NamedTuple for Images, send that to the secscan analyzer
        # rather than the database Image itself.
        iterator = yield_random_entries(
            batch_query, get_image_pk_field(), batch_size, max_id, min_id,
        )

        return (iterator, ScanToken(max_id + 1))
예제 #5
0
    def _backfill_manifests(self):
        try:
            Manifest.select().where(
                Manifest.layers_compressed_size >> None).get()
        except Manifest.DoesNotExist:
            logger.debug("Manifest backfill worker has completed; skipping")
            return False

        iterator = yield_random_entries(
            lambda: Manifest.select().where(Manifest.layers_compressed_size >>
                                            None),
            Manifest.id,
            250,
            Manifest.select(fn.Max(Manifest.id)).scalar(),
            1,
        )

        for manifest_row, abt, _ in iterator:
            if manifest_row.layers_compressed_size is not None:
                logger.debug("Another worker preempted this worker")
                abt.set()
                continue

            logger.debug("Setting layers compressed size for manifest %s",
                         manifest_row.id)
            layers_compressed_size = -1
            config_media_type = None
            manifest_bytes = Bytes.for_string_or_unicode(
                manifest_row.manifest_bytes)

            try:
                parsed = parse_manifest_from_bytes(
                    manifest_bytes,
                    manifest_row.media_type.name,
                    validate=False)
                layers_compressed_size = parsed.layers_compressed_size
                if layers_compressed_size is None:
                    layers_compressed_size = 0

                config_media_type = parsed.config_media_type or None
            except ManifestException as me:
                logger.warning(
                    "Got exception when trying to parse manifest %s: %s",
                    manifest_row.id, me)

            assert layers_compressed_size is not None
            updated = (Manifest.update(
                layers_compressed_size=layers_compressed_size,
                config_media_type=config_media_type,
            ).where(Manifest.id == manifest_row.id,
                    Manifest.layers_compressed_size >> None).execute())
            if updated != 1:
                logger.debug("Another worker preempted this worker")
                abt.set()
                continue

        return True
예제 #6
0
    def _candidates_to_backfill(self):
        def missing_tmt_query():
            return (
                self._filter(RepositoryTag.select())
                .join(TagToRepositoryTag, JOIN.LEFT_OUTER)
                .where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)
            )

        min_id = self._filter(RepositoryTag.select(fn.Min(RepositoryTag.id))).scalar()
        max_id = self._filter(RepositoryTag.select(fn.Max(RepositoryTag.id))).scalar()

        logger.info("Found candidate range %s-%s", min_id, max_id)

        iterator = yield_random_entries(missing_tmt_query, RepositoryTag.id, 1000, max_id, min_id,)

        return iterator
예제 #7
0
def verify_placements():
    encountered = set()

    iterator = yield_random_entries(
        lambda: ImageStorage.select().where(ImageStorage.uploading == False),
        ImageStorage.id,
        1000,
        ImageStorage.select(fn.Max(ImageStorage.id)).scalar(),
        1,
    )

    for storage_row, abt, _ in iterator:
        if storage_row.id in encountered:
            continue

        encountered.add(storage_row.id)

        logger.info("Checking placements for storage `%s`", storage_row.uuid)
        try:
            with_locations = model.storage.get_storage_by_uuid(
                storage_row.uuid)
        except model.InvalidImageException:
            logger.exception("Could not find storage `%s`", storage_row.uuid)
            continue

        storage_path = model.storage.get_layer_path(storage_row)
        locations_to_check = set(with_locations.locations)
        if locations_to_check:
            logger.info("Checking locations `%s` for storage `%s`",
                        locations_to_check, storage_row.uuid)
            for location in locations_to_check:
                logger.info("Checking location `%s` for storage `%s`",
                            location, storage_row.uuid)
                if not storage.exists([location], storage_path):
                    location_row = _get_location_row(location)
                    logger.info(
                        "Location `%s` is missing for storage `%s`; removing",
                        location,
                        storage_row.uuid,
                    )
                    (ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage == storage_row,
                        ImageStoragePlacement.location == location_row,
                    ).execute())
예제 #8
0
    def _candidates_to_backfill(self):
        def missing_tmt_query():
            return (TagManifestLabel.select().join(
                TagManifestLabelMap,
                JOIN.LEFT_OUTER).where(TagManifestLabelMap.id >> None))

        min_id = (TagManifestLabel.select(fn.Min(TagManifestLabel.id)).join(
            TagManifestLabelMap,
            JOIN.LEFT_OUTER).where(TagManifestLabelMap.id >> None).scalar())
        max_id = TagManifestLabel.select(fn.Max(TagManifestLabel.id)).scalar()

        iterator = yield_random_entries(
            missing_tmt_query,
            TagManifestLabel.id,
            100,
            max_id,
            min_id,
        )

        return iterator
예제 #9
0
    def _run_counting(self):
        yesterday = date.today() - timedelta(days=1)

        def batch_query():
            return model.repositoryactioncount.missing_counts_query(yesterday)

        min_id = model.repository.get_min_id()
        max_id = model.repository.get_max_id()
        if min_id is None or max_id is None:
            return

        # Check for the number RAC entries vs number of repos. If they are the same,
        # nothing more to do.
        repo_count = model.repository.get_repository_count()
        rac_count = model.repositoryactioncount.found_entry_count(yesterday)
        if rac_count >= repo_count:
            logger.debug("All RAC entries found; nothing more to do")
            return

        # This gives us a scalable batch size into the millions.
        batch_size = int(3**log10(max(10, max_id - min_id)))

        iterator = yield_random_entries(
            batch_query,
            database.Repository.id,
            batch_size,
            max_id,
            min_id,
        )

        for candidate, abt, num_remaining in iterator:
            if model.repositoryactioncount.has_repository_action_count(
                    candidate, yesterday):
                abt.set()
                continue

            if not self._count_repository_actions(candidate):
                abt.set()
예제 #10
0
def test_no_work():
    def create_empty_query():
        return FakeQuery([])

    for _ in yield_random_entries(create_empty_query, FAKE_PK_FIELD, 1, 10):
        assert False, 'There should never be any actual work!'
예제 #11
0
    def perform_indexing(self, start_token=None):
        whitelisted_namespaces = self.app.config.get(
            "SECURITY_SCANNER_V4_NAMESPACE_WHITELIST", [])
        try:
            indexer_state = self._secscan_api.state()
        except APIRequestFailure:
            return None

        def eligible_manifests(base_query):
            return (base_query.join(Repository).join(User).where(
                User.username << whitelisted_namespaces))

        min_id = (start_token.min_id if start_token is not None else
                  Manifest.select(fn.Min(Manifest.id)).scalar())
        max_id = Manifest.select(fn.Max(Manifest.id)).scalar()

        if max_id is None or min_id is None or min_id > max_id:
            return None

        reindex_threshold = lambda: datetime.utcnow() - timedelta(
            seconds=self.app.config.get("SECURITY_SCANNER_V4_REINDEX_THRESHOLD"
                                        ))

        # TODO(alecmerdler): Filter out any `Manifests` that are still being uploaded
        def not_indexed_query():
            return (eligible_manifests(
                Manifest.select()).switch(Manifest).join(
                    ManifestSecurityStatus,
                    JOIN.LEFT_OUTER).where(ManifestSecurityStatus.id >> None))

        def index_error_query():
            return (eligible_manifests(Manifest.select()).switch(
                Manifest).join(ManifestSecurityStatus).where(
                    ManifestSecurityStatus.index_status == IndexStatus.FAILED,
                    ManifestSecurityStatus.last_indexed < reindex_threshold(),
                ))

        def needs_reindexing_query(indexer_hash):
            return (eligible_manifests(Manifest.select()).switch(
                Manifest).join(ManifestSecurityStatus).where(
                    ManifestSecurityStatus.indexer_hash != indexer_hash,
                    ManifestSecurityStatus.last_indexed < reindex_threshold(),
                ))

        # 4^log10(total) gives us a scalable batch size into the billions.
        batch_size = int(4**log10(max(10, max_id - min_id)))

        iterator = itertools.chain(
            yield_random_entries(
                not_indexed_query,
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
            yield_random_entries(
                index_error_query,
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
            yield_random_entries(
                lambda: needs_reindexing_query(indexer_state.get("state", "")),
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
        )

        for candidate, abt, num_remaining in iterator:
            manifest = ManifestDataType.for_manifest(candidate, None)
            layers = registry_model.list_manifest_layers(
                manifest, self.storage, True)

            logger.debug("Indexing %s/%s@%s" %
                         (candidate.repository.namespace_user,
                          candidate.repository.name, manifest.digest))

            try:
                (report, state) = self._secscan_api.index(manifest, layers)
            except APIRequestFailure:
                logger.exception(
                    "Failed to perform indexing, security scanner API error")
                return None

            with db_transaction():
                ManifestSecurityStatus.delete().where(
                    ManifestSecurityStatus.manifest == candidate).execute()
                ManifestSecurityStatus.create(
                    manifest=candidate,
                    repository=candidate.repository,
                    error_json=report["err"],
                    index_status=(IndexStatus.FAILED if report["state"]
                                  == IndexReportState.Index_Error else
                                  IndexStatus.COMPLETED),
                    indexer_hash=state,
                    indexer_version=IndexerVersion.V4,
                    metadata_json={},
                )

        return ScanToken(max_id + 1)
예제 #12
0
    def _get_manifest_iterator(self,
                               indexer_state,
                               min_id,
                               max_id,
                               batch_size=None,
                               reindex_threshold=None):
        # TODO(alecmerdler): Filter out any `Manifests` that are still being uploaded
        def not_indexed_query():
            return (Manifest.select(Manifest, ManifestSecurityStatus).join(
                ManifestSecurityStatus,
                JOIN.LEFT_OUTER).where(ManifestSecurityStatus.id >> None))

        def index_error_query():
            return (Manifest.select(
                Manifest,
                ManifestSecurityStatus).join(ManifestSecurityStatus).where(
                    ManifestSecurityStatus.index_status == IndexStatus.FAILED,
                    ManifestSecurityStatus.last_indexed < reindex_threshold
                    or DEFAULT_SECURITY_SCANNER_V4_REINDEX_THRESHOLD,
                ))

        def needs_reindexing_query(indexer_hash):
            return (Manifest.select(
                Manifest,
                ManifestSecurityStatus).join(ManifestSecurityStatus).where(
                    ManifestSecurityStatus.index_status !=
                    IndexStatus.MANIFEST_UNSUPPORTED,
                    ManifestSecurityStatus.indexer_hash != indexer_hash,
                    ManifestSecurityStatus.last_indexed < reindex_threshold
                    or DEFAULT_SECURITY_SCANNER_V4_REINDEX_THRESHOLD,
                ))

        # 4^log10(total) gives us a scalable batch size into the billions.
        if not batch_size:
            batch_size = int(4**log10(max(10, max_id - min_id)))

        iterator = itertools.chain(
            yield_random_entries(
                not_indexed_query,
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
            yield_random_entries(
                index_error_query,
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
            yield_random_entries(
                lambda: needs_reindexing_query(indexer_state.get("state", "")),
                Manifest.id,
                batch_size,
                max_id,
                min_id,
            ),
        )

        return iterator