Пример #1
0
    def _call(self, method, path, params=None, body=None):
        """
        Issues an HTTP request to the security endpoint handling the logic of using an alternative
        BATCH endpoint for non-GET requests and failover for GET requests.
        """
        timeout = self._config.get("SECURITY_SCANNER_API_TIMEOUT_SECONDS", 1)
        endpoint = self._config["SECURITY_SCANNER_ENDPOINT"]

        with CloseForLongOperation(self._config):
            # If the request isn't a read, attempt to use a batch stack and do not fail over.
            if method != "GET":
                if self._config.get(
                        "SECURITY_SCANNER_ENDPOINT_BATCH") is not None:
                    endpoint = self._config["SECURITY_SCANNER_ENDPOINT_BATCH"]
                    timeout = (self._config.get(
                        "SECURITY_SCANNER_API_BATCH_TIMEOUT_SECONDS")
                               or timeout)
                return self._request(method, endpoint, path, body, params,
                                     timeout)

            # The request is read-only and can failover.
            all_endpoints = [endpoint] + self._config.get(
                "SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS", [])
            return _failover_read_request(*[((self._request, endpoint, path,
                                              body, params, timeout), {})
                                            for endpoint in all_endpoints])
Пример #2
0
    def _finalize_blob_storage(self, app_config):
        """ When an upload is successful, this ends the uploading process from the storage's perspective.

            Returns True if the blob already existed. """
        computed_digest = digest_tools.sha256_digest_from_hashlib(
            self.blob_upload.sha_state)
        final_blob_location = digest_tools.content_path(computed_digest)

        # Close the database connection before we perform this operation, as it can take a while
        # and we shouldn't hold the connection during that time.
        with CloseForLongOperation(app_config):
            # Move the storage into place, or if this was a re-upload, cancel it
            already_existed = self.storage.exists(
                {self.blob_upload.location_name}, final_blob_location)
            if already_existed:
                # It already existed, clean up our upload which served as proof that the
                # uploader had the blob.
                self.storage.cancel_chunked_upload(
                    {self.blob_upload.location_name},
                    self.blob_upload.upload_id,
                    self.blob_upload.storage_metadata,
                )
            else:
                # We were the first ones to upload this image (at least to this location)
                # Let's copy it into place
                self.storage.complete_chunked_upload(
                    {self.blob_upload.location_name},
                    self.blob_upload.upload_id,
                    final_blob_location,
                    self.blob_upload.storage_metadata,
                )

        return already_existed
Пример #3
0
    def yield_logs_for_export(
        self,
        start_datetime,
        end_datetime,
        repository_id=None,
        namespace_id=None,
        max_query_time=None,
    ):
        max_query_time = max_query_time.total_seconds(
        ) if max_query_time is not None else 300
        search = self._base_query_date_range(start_datetime, end_datetime,
                                             None, repository_id, namespace_id,
                                             None)

        def raise_on_timeout(batch_generator):
            start = time()
            for batch in batch_generator:
                elapsed = time() - start
                if elapsed > max_query_time:
                    logger.error(
                        "Retrieval of logs `%s/%s` timed out with time of `%s`",
                        namespace_id,
                        repository_id,
                        elapsed,
                    )
                    raise LogsIterationTimeout()

                yield batch
                start = time()

        def read_batch(scroll):
            batch = []
            for log in scroll:
                batch.append(log)
                if len(batch) == DEFAULT_RESULT_WINDOW:
                    yield _for_elasticsearch_logs(batch,
                                                  repository_id=repository_id,
                                                  namespace_id=namespace_id)
                    batch = []

            if batch:
                yield _for_elasticsearch_logs(batch,
                                              repository_id=repository_id,
                                              namespace_id=namespace_id)

        search = search.params(size=DEFAULT_RESULT_WINDOW,
                               request_timeout=max_query_time)

        try:
            with CloseForLongOperation(config.app_config):
                for batch in raise_on_timeout(read_batch(search.scan())):
                    yield batch
        except ConnectionTimeout:
            raise LogsIterationTimeout()
Пример #4
0
    def poll_queue(self):
        logger.debug("Getting work item from queue.")

        with self._current_item_lock:
            self.current_queue_item = self._queue.get(
                processing_time=self._reservation_seconds)

        while True:
            # Retrieve the current item in the queue over which to operate. We do so under
            # a lock to make sure we are always retrieving an item when in a healthy state.
            current_queue_item = None
            with self._current_item_lock:
                current_queue_item = self.current_queue_item
                if current_queue_item is None:
                    break

            logger.debug("Queue gave us some work: %s",
                         current_queue_item.body)
            job_details = json.loads(current_queue_item.body)

            try:
                with CloseForLongOperation(app.config):
                    self.process_queue_item(job_details)

                self.mark_current_complete()

            except JobException as jex:
                logger.warning("An error occurred processing request: %s",
                               current_queue_item.body)
                logger.warning("Job exception: %s", jex)
                self.mark_current_incomplete(restore_retry=False)

            except WorkerSleepException as exc:
                logger.debug("Worker has been requested to go to sleep")
                self.mark_current_incomplete(restore_retry=True)
                time.sleep(QUEUE_WORKER_SLEEP_DURATION)

            except WorkerUnhealthyException as exc:
                logger.error(
                    "The worker has encountered an error via the job and will not take new jobs"
                )
                logger.error(str(exc))
                self.mark_current_incomplete(restore_retry=True)
                self._stop.set()

            if not self._stop.is_set():
                with self._current_item_lock:
                    self.current_queue_item = self._queue.get(
                        processing_time=self._reservation_seconds)

        if not self._stop.is_set():
            logger.debug("No more work.")
Пример #5
0
    def lookup_latest_logs(self,
                           performer_name=None,
                           repository_name=None,
                           namespace_name=None,
                           filter_kinds=None,
                           size=20):
        repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
            repository_name, namespace_name, performer_name)

        with CloseForLongOperation(config.app_config):
            latest_logs = self._load_latest_logs(performer_id, repository_id,
                                                 account_id, filter_kinds,
                                                 size)

        return latest_logs
Пример #6
0
    def get_aggregated_log_counts(
        self,
        start_datetime,
        end_datetime,
        performer_name=None,
        repository_name=None,
        namespace_name=None,
        filter_kinds=None,
    ):
        if end_datetime - start_datetime >= timedelta(days=DATE_RANGE_LIMIT):
            raise Exception(
                "Cannot lookup aggregated logs over a period longer than a month"
            )

        repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
            repository_name, namespace_name, performer_name)

        with CloseForLongOperation(config.app_config):
            search = self._base_query_date_range(start_datetime, end_datetime,
                                                 performer_id, repository_id,
                                                 account_id, filter_kinds)
            search.aggs.bucket("by_id", "terms",
                               field="kind_id").bucket("by_date",
                                                       "date_histogram",
                                                       field="datetime",
                                                       interval="day")
            # es returns all buckets when size=0
            search = search.extra(size=0)
            resp = search.execute()

        if not resp.aggregations:
            return []

        counts = []
        by_id = resp.aggregations["by_id"]

        for id_bucket in by_id.buckets:
            for date_bucket in id_bucket.by_date.buckets:
                if date_bucket.doc_count > 0:
                    counts.append(
                        AggregatedLogCount(id_bucket.key,
                                           date_bucket.doc_count,
                                           date_bucket.key))

        return counts
Пример #7
0
    def _call(self, method, path, params=None, body=None, headers=None):
        """ Issues an HTTP request to signing service and handles failover for GET requests.
    """
        timeout = self._config.get('TUF_API_TIMEOUT_SECONDS', 1)
        endpoint = self._config['TUF_SERVER']

        with CloseForLongOperation(self._config):
            # If the request isn't a read do not fail over.
            if method != 'GET':
                return self._request(method, endpoint, path, body, headers,
                                     params, timeout)

            # The request is read-only and can failover.
            all_endpoints = [endpoint] + self._config.get(
                'TUF_READONLY_FAILOVER_ENDPOINTS', [])
            return _failover_read_request(*[((self._request, endpoint, path,
                                              body, headers, params, timeout),
                                             {})
                                            for endpoint in all_endpoints])
Пример #8
0
    def _archive_redis_buildlogs(self):
        """
        Archive a single build, choosing a candidate at random.

        This process must be idempotent to avoid needing two-phase commit.
        """
        # Get a random build to archive
        to_archive = model.get_archivable_build()
        if to_archive is None:
            logger.debug("No more builds to archive")
            return

        logger.debug("Archiving: %s", to_archive.uuid)

        length, entries = build_logs.get_log_entries(to_archive.uuid, 0)
        to_encode = {
            "start": 0,
            "total": length,
            "logs": entries,
        }

        if length > 0:
            with CloseForLongOperation(app.config):
                with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
                    with GzipFile("testarchive",
                                  fileobj=tempfile) as zipstream:
                        for chunk in StreamingJSONEncoder().iterencode(
                                to_encode):
                            zipstream.write(chunk)

                    tempfile.seek(0)
                    log_archive.store_file(tempfile,
                                           JSON_MIMETYPE,
                                           content_encoding="gzip",
                                           file_id=to_archive.uuid)

        we_updated = model.mark_build_archived(to_archive.uuid)
        if we_updated:
            build_logs.expire_status(to_archive.uuid)
            build_logs.delete_log_entries(to_archive.uuid)
        else:
            logger.debug("Another worker pre-empted us when archiving: %s",
                         to_archive.uuid)
Пример #9
0
    def replicate_storage(self,
                          namespace,
                          storage_uuid,
                          storage,
                          backoff_check=True):
        # Lookup the namespace and its associated regions.
        if not namespace:
            logger.debug(
                "Unknown namespace when trying to replicate storage %s",
                storage_uuid)
            return

        locations = model.user.get_region_locations(namespace)

        # Lookup the image storage.
        try:
            partial_storage = model.storage.get_storage_by_uuid(storage_uuid)
        except model.InvalidImageException:
            logger.debug("Unknown storage: %s", storage_uuid)
            return

        # Check to see if the image is at all the required locations.
        locations_required = locations | set(storage.default_locations)
        locations_missing = locations_required - set(partial_storage.locations)

        logger.debug(
            "For replication of storage %s under namespace %s: %s required; %s missing",
            storage_uuid,
            namespace.username,
            locations_required,
            locations_missing,
        )

        if not locations_missing:
            logger.debug(
                "No missing locations for storage %s under namespace %s. Required: %s",
                storage_uuid,
                namespace.username,
                locations_required,
            )
            return

        # For any missing storage locations, initiate a copy.
        existing_location = list(partial_storage.locations)[0]
        path_to_copy = model.storage.get_layer_path(partial_storage)

        # Lookup and ensure the existing location exists.
        if not self._backoff_check_exists(existing_location, path_to_copy,
                                          storage, backoff_check):
            logger.warning(
                "Cannot find image storage %s in existing location %s; stopping replication",
                storage_uuid,
                existing_location,
            )
            raise JobException()

        # For each missing location, copy over the storage.
        for location in locations_missing:
            logger.debug(
                "Starting copy of storage %s to location %s from %s",
                partial_storage.uuid,
                location,
                existing_location,
            )

            # Copy the binary data.
            copied = False
            try:
                with CloseForLongOperation(app.config):
                    storage.copy_between(path_to_copy, existing_location,
                                         location)
                    copied = True
            except IOError:
                logger.exception(
                    "Failed to copy path `%s` of image storage %s to location %s",
                    path_to_copy,
                    partial_storage.uuid,
                    location,
                )
                raise JobException()
            except:
                logger.exception(
                    "Unknown exception when copying path %s of image storage %s to loc %s",
                    path_to_copy,
                    partial_storage.uuid,
                    location,
                )
                raise WorkerUnhealthyException()

            if copied:
                # Verify the data was copied to the target storage, to ensure that there are no cases
                # where we write the placement without knowing the data is present.
                if not self._backoff_check_exists(location, path_to_copy,
                                                  storage, backoff_check):
                    logger.warning(
                        "Failed to find path `%s` in location `%s` after copy",
                        path_to_copy,
                        location,
                    )
                    raise JobException()

                # Create the storage location record for the storage now that the copy has
                # completed.
                model.storage.add_storage_placement(partial_storage, location)

                logger.debug(
                    "Finished copy of image storage %s to location %s from %s",
                    partial_storage.uuid,
                    location,
                    existing_location,
                )

        logger.debug(
            "Completed replication of image storage %s to locations %s from %s",
            partial_storage.uuid,
            locations_missing,
            existing_location,
        )
Пример #10
0
    def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1):
        """
        Uploads a chunk of data found in the given input file-like interface. start_offset and
        length are optional and should match a range header if any was given.

        Returns the total number of bytes uploaded after this upload has completed. Raises a
        BlobUploadException if the upload failed.
        """
        assert start_offset is not None
        assert length is not None

        if start_offset > 0 and start_offset > self.blob_upload.byte_count:
            logger.error(
                "start_offset provided greater than blob_upload.byte_count")
            raise BlobRangeMismatchException()

        # Ensure that we won't go over the allowed maximum size for blobs.
        max_blob_size = bitmath.parse_string_unsafe(
            self.settings.maximum_blob_size)
        uploaded = bitmath.Byte(length + start_offset)
        if length > -1 and uploaded > max_blob_size:
            raise BlobTooLargeException(uploaded=uploaded.bytes,
                                        max_allowed=max_blob_size.bytes)

        location_set = {self.blob_upload.location_name}
        upload_error = None
        with CloseForLongOperation(app_config):
            if start_offset > 0 and start_offset < self.blob_upload.byte_count:
                # Skip the bytes which were received on a previous push, which are already stored and
                # included in the sha calculation
                overlap_size = self.blob_upload.byte_count - start_offset
                input_fp = StreamSlice(input_fp, overlap_size)

                # Update our upload bounds to reflect the skipped portion of the overlap
                start_offset = self.blob_upload.byte_count
                length = max(length - overlap_size, 0)

            # We use this to escape early in case we have already processed all of the bytes the user
            # wants to upload.
            if length == 0:
                return self.blob_upload.byte_count

            input_fp = wrap_with_handler(input_fp,
                                         self.blob_upload.sha_state.update)

            if self.extra_blob_stream_handlers:
                for handler in self.extra_blob_stream_handlers:
                    input_fp = wrap_with_handler(input_fp, handler)

            # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the
            # stream so we can determine the uncompressed size. We'll throw out this data if another chunk
            # comes in, but in the common case the docker client only sends one chunk.
            size_info = None
            if start_offset == 0 and self.blob_upload.chunk_count == 0:
                size_info, fn = calculate_size_handler()
                input_fp = wrap_with_handler(input_fp, fn)

            start_time = time.time()
            length_written, new_metadata, upload_error = self.storage.stream_upload_chunk(
                location_set,
                self.blob_upload.upload_id,
                start_offset,
                length,
                input_fp,
                self.blob_upload.storage_metadata,
                content_type=BLOB_CONTENT_TYPE,
            )

            if upload_error is not None:
                logger.error("storage.stream_upload_chunk returned error %s",
                             upload_error)
                raise BlobUploadException(upload_error)

            # Update the chunk upload time and push bytes metrics.
            chunk_upload_duration.labels(
                list(location_set)[0]).observe(time.time() - start_time)
            pushed_bytes_total.inc(length_written)

        # Ensure we have not gone beyond the max layer size.
        new_blob_bytes = self.blob_upload.byte_count + length_written
        new_blob_size = bitmath.Byte(new_blob_bytes)
        if new_blob_size > max_blob_size:
            raise BlobTooLargeException(uploaded=new_blob_size,
                                        max_allowed=max_blob_size.bytes)

        # If we determined an uncompressed size and this is the first chunk, add it to the blob.
        # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks.
        uncompressed_byte_count = self.blob_upload.uncompressed_byte_count
        if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid:
            uncompressed_byte_count = size_info.uncompressed_size
        elif length_written > 0:
            # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't
            # know the uncompressed size.
            uncompressed_byte_count = None

        self.blob_upload = registry_model.update_blob_upload(
            self.blob_upload,
            uncompressed_byte_count,
            new_metadata,
            new_blob_bytes,
            self.blob_upload.chunk_count + 1,
            self.blob_upload.sha_state,
        )
        if self.blob_upload is None:
            raise BlobUploadException("Could not complete upload of chunk")

        return new_blob_bytes
Пример #11
0
    def lookup_logs(
        self,
        start_datetime,
        end_datetime,
        performer_name=None,
        repository_name=None,
        namespace_name=None,
        filter_kinds=None,
        page_token=None,
        max_page_count=None,
    ):
        assert start_datetime is not None and end_datetime is not None

        # Check for a valid combined model token when migrating online from a combined model
        if page_token is not None and page_token.get(
                "readwrite_page_token") is not None:
            page_token = page_token.get("readwrite_page_token")

        if page_token is not None and max_page_count is not None:
            page_number = page_token.get("page_number")
            if page_number is not None and page_number + 1 > max_page_count:
                return LogEntriesPage([], None)

        repository_id, account_id, performer_id = DocumentLogsModel._get_ids_by_names(
            repository_name, namespace_name, performer_name)

        after_datetime = None
        after_random_id = None
        if page_token is not None:
            after_datetime = parse_datetime(page_token["datetime"])
            after_random_id = page_token["random_id"]

        if after_datetime is not None:
            end_datetime = min(end_datetime, after_datetime)

        all_logs = []

        with CloseForLongOperation(config.app_config):
            for current_date in _date_range_descending(start_datetime,
                                                       end_datetime):
                try:
                    logs = self._load_logs_for_day(
                        current_date,
                        performer_id,
                        repository_id,
                        account_id,
                        filter_kinds,
                        after_datetime,
                        after_random_id,
                        size=PAGE_SIZE + 1,
                    )

                    all_logs.extend(logs)
                except NotFoundError:
                    continue

                if len(all_logs) > PAGE_SIZE:
                    break

        next_page_token = None
        all_logs = all_logs[0:PAGE_SIZE + 1]

        if len(all_logs) == PAGE_SIZE + 1:
            # The last element in the response is used to check if there's more elements.
            # The second element in the response is used as the pagination token because search_after does
            # not include the exact match, and so the next page will start with the last element.
            # This keeps the behavior exactly the same as table_logs_model, so that
            # the caller can expect when a pagination token is non-empty, there must be
            # at least 1 log to be retrieved.
            next_page_token = {
                "datetime": all_logs[-2].datetime.isoformat(),
                "random_id": all_logs[-2].random_id,
                "page_number":
                page_token["page_number"] + 1 if page_token else 1,
            }

        return LogEntriesPage(
            _for_elasticsearch_logs(all_logs[:PAGE_SIZE], repository_id,
                                    account_id),
            next_page_token,
        )
Пример #12
0
 def retrieve(self, cache_key, loader, should_cache=is_not_none):
     with CloseForLongOperation(self.app_config):
         return self.cache.retrieve(cache_key, loader, should_cache)