Exemplo n.º 1
0
def batch_file() -> BatchFile:
    batch = BatchFile(
        index=1,
        keys=[
            TemporaryExposureKey(
                key_data=base64.b64encode(
                    "first_key".encode("utf-8")).decode("utf-8"),
                transmission_risk_level=TransmissionRiskLevel.low,
                rolling_start_number=1,
            ),
            TemporaryExposureKey(
                key_data=base64.b64encode(
                    "second_key".encode("utf-8")).decode("utf-8"),
                transmission_risk_level=TransmissionRiskLevel.low,
                rolling_start_number=2,
            ),
            TemporaryExposureKey(
                key_data=base64.b64encode(
                    "third_key".encode("utf-8")).decode("utf-8"),
                transmission_risk_level=TransmissionRiskLevel.highest,
                rolling_start_number=3,
            ),
        ],
        period_start=datetime.utcnow() - timedelta(days=1),
        period_end=datetime.utcnow(),
        sub_batch_count=2,
        sub_batch_index=1,
        client_content=b"this_is_a_zip_file",
    )
    batch.save()
    return batch
async def _process_uploads() -> None:
    """
    This task is run every "PERIOD".
    The period is defined in config, it can be thought of as a period of 6 / 12 / 24 hours.

    The task will create a batch file from the unprocessed Uploads of the same period, and perform
    some minor validations.

    If the number of keys in the given uploads is greater than the maximum number of keys allowed in
    a single batch, the task will create multiple batches and group them into "sub-batches" instead.
    """

    _LOGGER.info("About to start processing uploads.")
    # Acquire a lock on redis before processing anything, avoiding concurrent tasks.
    async with lock_concurrency("process_uploads"):
        _LOGGER.info("Obtained lock.")

        infos = BatchFile.get_latest_info()
        now = datetime.utcnow()

        if infos:
            last_period, last_index = infos
        else:
            last_period = datetime.fromtimestamp(
                croniter(config.BATCH_PERIODICITY_CRONTAB).get_prev())
            last_index = 0

        period_start = last_period
        period_end = now

        _LOGGER.info(
            "Starting to process uploads.",
            extra=dict(period_start=period_start, period_end=period_end),
        )

        uploads = Upload.to_process()

        _LOGGER.info("Uploads have been fetched.",
                     extra=dict(n_uploads=uploads.count()))

        processed_uploads: List[ObjectId] = []
        keys: List[TemporaryExposureKey] = []
        for upload in uploads:
            if (reached :=
                    len(keys) + len(upload.keys)) > config.MAX_KEYS_PER_BATCH:
                _LOGGER.warning(
                    "Early stop: reached maximum number of keys per batch.",
                    extra=dict(pre_reached=len(keys),
                               reached=reached,
                               max=config.MAX_KEYS_PER_BATCH),
                )
                break
            keys += extract_keys_with_risk_level_from_upload(upload)
            processed_uploads.append(upload.id)

        if (n_keys := len(keys)) > 0:
            # Sort the keys. This randomizes their order (since they are random strings) so that
            # keys of the same device are no more likely to end up consecutively.
            keys = sorted(keys, key=lambda x: x.key_data)

            index = last_index + 1

            batch_file = BatchFile(
                index=index,
                keys=keys,
                period_start=period_start,
                period_end=period_end,
                sub_batch_index=1,
                sub_batch_count=1,
            )
            batch_file.client_content = batch_to_sdk_zip_file(batch_file)
            batch_file.save()
            _LOGGER.info("Created new batch.",
                         extra=dict(index=index, n_keys=n_keys))