Пример #1
0
def test_new_envelope_enforces_daily_limit():
    factories.EnvelopeFactory.create(envelope_id="249999")

    with pytest.raises(ValueError) as e:
        Envelope.new_envelope()

    assert e.value.args == (
        "Cannot create more than 9999 Envelopes on a single year.", )
Пример #2
0
def test_next_envelope_id_overflow():
    """Since the counter contains 4 digits, 9999 envelopes can be created a
    year, attempting to create more should raise a ValueError."""

    with freezegun.freeze_time("2023-01-01"):
        assert EnvelopeFactory.create(envelope_id="239999").envelope_id == "239999"

        with pytest.raises(ValueError):
            Envelope.next_envelope_id()
Пример #3
0
def test_new_envelope_populates_envelope_id():
    """Verify Envelope.new_envelope correctly populates envelope_id."""
    # Create 3 envelopes: the first envelope in a year uses
    #                     different logic to subsequent years,
    #                     this verifies that ids increment in both cases.
    envelope1 = Envelope.new_envelope()
    assert envelope1.envelope_id == "300001"

    envelope2 = Envelope.new_envelope()
    assert envelope2.envelope_id == "300002"

    envelope3 = Envelope.new_envelope()
    assert envelope3.envelope_id == "300003"
Пример #4
0
    def handle(self, *args, **options):
        workbasket_ids = options.get("workbasket_ids")
        if workbasket_ids:
            query = dict(id__in=workbasket_ids)
        else:
            query = dict(status=WorkflowStatus.APPROVED)

        workbaskets = WorkBasket.objects.filter(**query)
        if not workbaskets:
            sys.exit(
                "Nothing to upload:  No workbaskets with status APPROVED.")

        # transactions:  will be serialized, then added to an envelope for uploaded.
        transactions = workbaskets.ordered_transactions()

        if not transactions:
            sys.exit(
                f"Nothing to upload:  {workbaskets.count()} Workbaskets APPROVED but none contain any transactions.",
            )

        if options.get("envelope_id") == ["auto"]:
            envelope_id = int(Envelope.next_envelope_id())
        else:
            envelope_id = int(options.get("envelope_id")[0])

        # Setting max_envelope_size to 0, also disables splitting - so normalise 0 to None:
        max_envelope_size = (None if options.get("disable_splitting") else int(
            options.get("max_envelope_size") or None))

        directory = options.get("directory", ".")

        output_file_constructor = dit_file_generator(directory, envelope_id)
        serializer = MultiFileEnvelopeTransactionSerializer(
            output_file_constructor,
            envelope_id=envelope_id,
            max_envelope_size=max_envelope_size,
        )
        errors = False
        for time_to_render, rendered_envelope in item_timer(
                serializer.split_render_transactions(transactions), ):
            envelope_file = rendered_envelope.output
            if not rendered_envelope.transactions:
                self.stdout.write(
                    f"{envelope_file.name} {WARNING_SIGN_EMOJI}  is empty !", )
                errors = True
            else:
                envelope_file.seek(0, os.SEEK_SET)
                try:
                    validate_envelope(envelope_file)
                except etree.DocumentInvalid:
                    self.stdout.write(
                        f"{envelope_file.name} {WARNING_SIGN_EMOJI}️ Envelope invalid:",
                    )
                else:
                    total_transactions = len(rendered_envelope.transactions)
                    self.stdout.write(
                        f"{envelope_file.name} \N{WHITE HEAVY CHECK MARK}  XML valid.  {total_transactions} transactions, serialized in {time_to_render:.2f} seconds using {envelope_file.tell()} bytes.",
                    )
        if errors:
            sys.exit(1)
Пример #5
0
def upload_and_create_envelopes(
    workbaskets: QuerySet,
    rendered_envelopes: Sequence[RenderedTransactions],
    first_envelope_id,
) -> UploadTaskResultData:
    """
    Upload Envelope data to the the s3 and create artifacts in the database.

    Side effects on success:
    Create Envelope, EnvelopeTransaction and Upload objects in the database and upload envelope XML to an S3 object.

    :return: :class:`~exporter.util.UploadTaskResultData`.
    """
    # upload_status holds data to pass to the next Task, including messages to the user.
    upload_status = UploadTaskResultData()

    current_envelope_id = first_envelope_id
    for rendered_envelope in rendered_envelopes:
        envelope = Envelope.new_envelope()
        if current_envelope_id != int(envelope.envelope_id):
            logger.error(
                "Envelope created out of sequence: %s != %i this may be due to simultaneous updates causing a race "
                "condition.",
                (current_envelope_id, int(envelope.envelope_id)),
            )
            raise RaceCondition(
                f"Envelope out of sequence: {envelope.envelope_id} != {current_envelope_id}",
            )
        current_envelope_id = int(envelope.envelope_id)

        envelope_transactions = [
            EnvelopeTransaction(order=order, envelope=envelope, transaction=transaction)
            for order, transaction in enumerate(rendered_envelope.transactions)
        ]
        EnvelopeTransaction.objects.bulk_create(envelope_transactions)
        envelope.save()

        rendered_envelope.output.seek(0, os.SEEK_SET)
        content_file = ContentFile(rendered_envelope.output.read())
        upload = Upload()
        upload.envelope = envelope
        upload.file = content_file

        rendered_envelope.output.seek(0, os.SEEK_SET)
        upload.checksum = md5(rendered_envelope.output.read()).hexdigest()

        upload.file.save(upload.filename, content_file)
        upload_status.add_upload_pk(upload.pk)

        logger.info("Workbasket saved to CDS S3 bucket")
        workbaskets.update(status=WorkflowStatus.SENT)

        logger.debug("Uploaded: %s", upload.filename)
        upload_status.add_envelope_messages(
            envelope.envelope_id,
            [f"Uploaded {upload.filename}"],
        )
    return upload_status
Пример #6
0
def upload_workbaskets() -> Tuple[bool, Optional[Dict[Union[str, None], str]]]:
    """
    Upload workbaskets.

    Returns a bool for success and dict of user messages keyed by envelope_id or
    None.
    """
    workbaskets = WorkBasket.objects.filter(
        status=WorkflowStatus.READY_FOR_EXPORT)
    if not workbaskets:
        msg = "Nothing to upload:  No workbaskets with status READY_FOR_EXPORT."
        logger.info(msg)
        return False, {None: msg}

    # transactions:  will be serialized, then added to an envelope for uploaded.
    transactions = workbaskets.ordered_transactions()

    if not transactions:
        msg = f"Nothing to upload:  {workbaskets.count()} Workbaskets READY_FOR_EXPORT but none contain any transactions."
        logger.info(msg)
        return False, {None: msg}

    first_envelope_id = int(Envelope.next_envelope_id())
    # Write files to a temporary, so they can all be validated before uploading.
    with tempfile.TemporaryDirectory(
            prefix="dit-tamato_") as temporary_directory:
        output_file_constructor = dit_file_generator(
            temporary_directory,
            first_envelope_id,
        )

        serializer = MultiFileEnvelopeTransactionSerializer(
            output_file_constructor,
            envelope_id=first_envelope_id,
            max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE,
        )

        rendered_envelopes = list(
            serializer.split_render_transactions(transactions))

        invalid_envelopes = validate_rendered_envelopes(rendered_envelopes)
        error_messages = {
            envelope_id: f"Envelope {envelope_id:06} was invalid {exception}"
            for envelope_id, exception in invalid_envelopes.items()
        }

        if error_messages:
            return False, error_messages

        # Transactions envelopes are all valid, and ready for upload.
        user_messages = upload_and_create_envelopes(
            workbaskets,
            rendered_envelopes,
            first_envelope_id,
        )

        return True, user_messages
Пример #7
0
def upload_and_create_envelopes(
    workbaskets: QuerySet,
    rendered_envelopes: Sequence[RenderedTransactions],
    first_envelope_id,
) -> Dict[Union[int, None], str]:
    # {envelope_id: message} User messages can be returned to the caller of the task.
    user_messages = {}
    current_envelope_id = first_envelope_id
    for rendered_envelope in rendered_envelopes:
        envelope = Envelope.new_envelope()
        if current_envelope_id != int(envelope.envelope_id):
            # TODO consider locking the table for writes instead
            logger.error(
                "Envelope created out of sequence: %s != %s this may due to simultaneous updates causing a race condition.",
                (current_envelope_id, int(envelope.envelope_id)),
            )
            raise RaceCondition(
                f"Envelope out of sequence: {envelope.envelope_id} != {current_envelope_id}",
            )
        current_envelope_id = int(envelope.envelope_id)

        envelope_transactions = [
            EnvelopeTransaction(order=order,
                                envelope=envelope,
                                transaction=transaction)
            for order, transaction in enumerate(rendered_envelope.transactions)
        ]
        EnvelopeTransaction.objects.bulk_create(envelope_transactions)
        envelope.save()

        rendered_envelope.output.seek(0, os.SEEK_SET)
        content_file = ContentFile(rendered_envelope.output.read())
        upload = Upload()
        upload.envelope = envelope
        upload.file = content_file

        rendered_envelope.output.seek(0, os.SEEK_SET)
        upload.checksum = md5(rendered_envelope.output.read()).hexdigest()

        upload.file.save(upload.filename, content_file)
        if settings.EXPORTER_DISABLE_NOTIFICATION:
            logger.info("HMRC notification disabled.")
        else:
            logger.info("Notify HMRC of upload, %s", upload.filename)
            upload.notify_hmrc()  # sets notification_sent

        logger.info("Workbasket sent to CDS")
        workbaskets.update(status=WorkflowStatus.SENT_TO_CDS)

        logger.debug("Uploaded: %s", upload.filename)
        user_messages[envelope.envelope_id] = f"Uploaded {upload.filename}"
    return user_messages
Пример #8
0
    def handle(self, *args, **options):
        workbaskets = WorkBasket.objects.filter(status=WorkflowStatus.READY_FOR_EXPORT)
        if not workbaskets:
            sys.exit("Nothing to upload:  No workbaskets with status READY_FOR_EXPORT.")

        # transactions:  will be serialized, then added to an envelope for uploaded.
        transactions = workbaskets.ordered_transactions()

        if not transactions:
            sys.exit(
                f"Nothing to upload:  {workbaskets.count()} Workbaskets READY_FOR_EXPORT but none contain any transactions.",
            )

        if options.get("envelope_id") is not None:
            envelope_id = int(options.get("envelope_id"))
        else:
            envelope_id = int(Envelope.next_envelope_id())

        directory = options.get("directory", ".")

        output_file_constructor = dit_file_generator(directory, envelope_id)
        serializer = MultiFileEnvelopeTransactionSerializer(
            output_file_constructor,
            envelope_id=envelope_id,
            max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE,
        )
        errors = False
        for rendered_envelope in serializer.split_render_transactions(transactions):
            envelope_file = rendered_envelope.output
            if not rendered_envelope.transactions:
                self.stdout.write(
                    f"{envelope_file.name} {WARNING_SIGN_EMOJI}  is empty !",
                )
                errors = True
            else:
                envelope_file.seek(0, os.SEEK_SET)
                try:
                    validate_envelope(envelope_file)
                except etree.DocumentInvalid:
                    self.stdout.write(
                        f"{envelope_file.name} {WARNING_SIGN_EMOJI}️ Envelope invalid:",
                    )
                else:
                    total_transactions = len(rendered_envelope.transactions)
                    self.stdout.write(
                        f"{envelope_file.name} \N{WHITE HEAVY CHECK MARK}  XML valid.  {total_transactions} transactions in {envelope_file.tell()} bytes.",
                    )
        if errors:
            sys.exit(1)
Пример #9
0
def test_transaction_envelope_serializer_splits_output():
    """
    Verify MultiFileEnvelopeTransactionSerializer outputs the tracked_models
    passed to it and generates records for descriptions.

    This test is a bit artificial: testing 40mb splitting would be inefficient,
    max_envelope_size is set to 7k, small enough to trigger envelope splitting
    after just one transaction.
    """
    # Add transactions with different kinds of data to the workbasket.
    approved_workbasket = ApprovedWorkBasketFactory.create()
    with ApprovedTransactionFactory.create(workbasket=approved_workbasket) as tx1:
        factories.RegulationFactory.create()

    with ApprovedTransactionFactory.create(workbasket=approved_workbasket) as tx2:
        factories.RegulationFactory.create(regulation_group=None),
        factories.FootnoteTypeFactory.create()

    with ApprovedTransactionFactory.create(workbasket=approved_workbasket) as tx3:
        factories.FootnoteTypeFactory.create()

    transactions = Transaction.objects.filter(pk__in=[tx1.pk, tx2.pk, tx3.pk])
    expected_transactions = [
        Transaction.objects.filter(pk=tx.pk) for tx in [tx1, tx2, tx3]
    ]
    expected_record_codes = [
        [
            (tracked_model.record_code, tracked_model.subrecord_code)
            for tracked_model in tx.tracked_models.all()
        ]
        for tx in [tx1, tx2, tx3]
    ]

    # Create a static buffers to output to + a function to grab each one in turn to use as the constructor.
    expected_outputs = [io.BytesIO(), io.BytesIO(), io.BytesIO()]

    def create_output_constructor():
        output_iter = iter(expected_outputs)
        return lambda: next(output_iter)

    serializer = MultiFileEnvelopeTransactionSerializer(
        create_output_constructor(),
        envelope_id=int(Envelope.next_envelope_id()),
        max_envelope_size=7000,
    )

    for i, rendered_envelope in enumerate(
        serializer.split_render_transactions(transactions),
    ):
        # Base assumption is that this yields RenderedTransactions
        assert isinstance(rendered_envelope, RenderedTransactions)

        assert rendered_envelope.output == expected_outputs[i]
        assert rendered_envelope.is_oversize is False
        assert 0 < rendered_envelope.output.tell() < 7000

        assert len(
            rendered_envelope.transactions,
        ), "Serializer should skip empty transactions, they cause XSD validation to fail."

        assert sorted(rendered_envelope.transactions) == sorted(
            expected_transactions[i],
        )

        # Verify the XML output
        output_xml = etree.XML(rendered_envelope.output.getvalue())
        output_record_codes = {*taric_xml_record_codes(output_xml)}

        # TODO - it would be good to check the output more thoroughly than just the record code.
        # Some record codes are generated in the template, making issuperset required in this assertion.
        assert output_record_codes.issuperset(expected_record_codes[i])
Пример #10
0
def test_next_envelope_id(year, first_envelope_id, next_envelope_id):
    """Verify that envelope ID is made up of two digits of the year and a 4
    digit counter starting from 0001."""
    with freezegun.freeze_time(f"{year}-01-01"):
        assert EnvelopeFactory.create(envelope_id=first_envelope_id)
        assert Envelope.next_envelope_id() == next_envelope_id
Пример #11
0
def upload_workbasket_envelopes(self, upload_status_data) -> Dict:
    """
    Upload workbaskets.

    :return :class:`~exporter.util.UploadTaskResultData`: object with user readable feedback on task status.
    """
    upload_status = UploadTaskResultData(**upload_status_data)
    workbaskets = WorkBasket.objects.filter(status=WorkflowStatus.APPROVED)

    if not workbaskets:
        msg = "Nothing to upload:  No workbaskets with status APPROVED."
        logger.info(msg)
        return dict(upload_status.add_messages([msg]))

    # transactions: will be serialized, then added to an envelope for upload.
    transactions = workbaskets.ordered_transactions()

    if not transactions:
        msg = f"Nothing to upload:  {workbaskets.count()} Workbaskets APPROVED but none contain any transactions."
        logger.info(msg)
        return dict(upload_status.add_messages([msg]))

    first_envelope_id = int(Envelope.next_envelope_id())
    # Envelope XML is written to temporary files for validation before anything is created
    # in the database or uploaded to s3.
    with tempfile.TemporaryDirectory(prefix="dit-tamato_") as temporary_directory:
        output_file_constructor = dit_file_generator(
            temporary_directory,
            first_envelope_id,
        )

        serializer = MultiFileEnvelopeTransactionSerializer(
            output_file_constructor,
            envelope_id=first_envelope_id,
            max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE,
        )

        rendered_envelopes = list(serializer.split_render_transactions(transactions))

        envelope_errors = validate_rendered_envelopes(rendered_envelopes)
        if envelope_errors:
            for envelope_id, errors in exceptions_as_messages(envelope_errors).items():
                upload_status.add_envelope_errors(envelope_id, errors)
            return dict(upload_status)

        # Transaction envelope data XML is valid, ready for upload to s3 and creation
        # of corresponding database objects.
        #
        # Feedback for the user is added to a :class:`~exporter.util.UploadTaskResultData` and serialized
        # so that it can be safely returned in the Celery task result.
        try:
            upload_status.update(
                upload_and_create_envelopes(
                    workbaskets,
                    rendered_envelopes,
                    first_envelope_id,
                ),
            )
            return dict(upload_status)
        except ConnectionError as e:
            # Connection issue during upload.
            if settings.EXPORTER_UPLOAD_MAX_RETRIES:
                logger.info(
                    "%s uploading attempting to upload envelope. endpoint: %s error: %s",
                    type(e),
                    e.kwargs.get("endpoint_url"),
                    e.kwargs.get("error"),
                )
                self.retry()
            else:
                raise