def test_new_envelope_enforces_daily_limit(): factories.EnvelopeFactory.create(envelope_id="249999") with pytest.raises(ValueError) as e: Envelope.new_envelope() assert e.value.args == ( "Cannot create more than 9999 Envelopes on a single year.", )
def test_next_envelope_id_overflow(): """Since the counter contains 4 digits, 9999 envelopes can be created a year, attempting to create more should raise a ValueError.""" with freezegun.freeze_time("2023-01-01"): assert EnvelopeFactory.create(envelope_id="239999").envelope_id == "239999" with pytest.raises(ValueError): Envelope.next_envelope_id()
def test_new_envelope_populates_envelope_id(): """Verify Envelope.new_envelope correctly populates envelope_id.""" # Create 3 envelopes: the first envelope in a year uses # different logic to subsequent years, # this verifies that ids increment in both cases. envelope1 = Envelope.new_envelope() assert envelope1.envelope_id == "300001" envelope2 = Envelope.new_envelope() assert envelope2.envelope_id == "300002" envelope3 = Envelope.new_envelope() assert envelope3.envelope_id == "300003"
def handle(self, *args, **options): workbasket_ids = options.get("workbasket_ids") if workbasket_ids: query = dict(id__in=workbasket_ids) else: query = dict(status=WorkflowStatus.APPROVED) workbaskets = WorkBasket.objects.filter(**query) if not workbaskets: sys.exit( "Nothing to upload: No workbaskets with status APPROVED.") # transactions: will be serialized, then added to an envelope for uploaded. transactions = workbaskets.ordered_transactions() if not transactions: sys.exit( f"Nothing to upload: {workbaskets.count()} Workbaskets APPROVED but none contain any transactions.", ) if options.get("envelope_id") == ["auto"]: envelope_id = int(Envelope.next_envelope_id()) else: envelope_id = int(options.get("envelope_id")[0]) # Setting max_envelope_size to 0, also disables splitting - so normalise 0 to None: max_envelope_size = (None if options.get("disable_splitting") else int( options.get("max_envelope_size") or None)) directory = options.get("directory", ".") output_file_constructor = dit_file_generator(directory, envelope_id) serializer = MultiFileEnvelopeTransactionSerializer( output_file_constructor, envelope_id=envelope_id, max_envelope_size=max_envelope_size, ) errors = False for time_to_render, rendered_envelope in item_timer( serializer.split_render_transactions(transactions), ): envelope_file = rendered_envelope.output if not rendered_envelope.transactions: self.stdout.write( f"{envelope_file.name} {WARNING_SIGN_EMOJI} is empty !", ) errors = True else: envelope_file.seek(0, os.SEEK_SET) try: validate_envelope(envelope_file) except etree.DocumentInvalid: self.stdout.write( f"{envelope_file.name} {WARNING_SIGN_EMOJI}️ Envelope invalid:", ) else: total_transactions = len(rendered_envelope.transactions) self.stdout.write( f"{envelope_file.name} \N{WHITE HEAVY CHECK MARK} XML valid. {total_transactions} transactions, serialized in {time_to_render:.2f} seconds using {envelope_file.tell()} bytes.", ) if errors: sys.exit(1)
def upload_and_create_envelopes( workbaskets: QuerySet, rendered_envelopes: Sequence[RenderedTransactions], first_envelope_id, ) -> UploadTaskResultData: """ Upload Envelope data to the the s3 and create artifacts in the database. Side effects on success: Create Envelope, EnvelopeTransaction and Upload objects in the database and upload envelope XML to an S3 object. :return: :class:`~exporter.util.UploadTaskResultData`. """ # upload_status holds data to pass to the next Task, including messages to the user. upload_status = UploadTaskResultData() current_envelope_id = first_envelope_id for rendered_envelope in rendered_envelopes: envelope = Envelope.new_envelope() if current_envelope_id != int(envelope.envelope_id): logger.error( "Envelope created out of sequence: %s != %i this may be due to simultaneous updates causing a race " "condition.", (current_envelope_id, int(envelope.envelope_id)), ) raise RaceCondition( f"Envelope out of sequence: {envelope.envelope_id} != {current_envelope_id}", ) current_envelope_id = int(envelope.envelope_id) envelope_transactions = [ EnvelopeTransaction(order=order, envelope=envelope, transaction=transaction) for order, transaction in enumerate(rendered_envelope.transactions) ] EnvelopeTransaction.objects.bulk_create(envelope_transactions) envelope.save() rendered_envelope.output.seek(0, os.SEEK_SET) content_file = ContentFile(rendered_envelope.output.read()) upload = Upload() upload.envelope = envelope upload.file = content_file rendered_envelope.output.seek(0, os.SEEK_SET) upload.checksum = md5(rendered_envelope.output.read()).hexdigest() upload.file.save(upload.filename, content_file) upload_status.add_upload_pk(upload.pk) logger.info("Workbasket saved to CDS S3 bucket") workbaskets.update(status=WorkflowStatus.SENT) logger.debug("Uploaded: %s", upload.filename) upload_status.add_envelope_messages( envelope.envelope_id, [f"Uploaded {upload.filename}"], ) return upload_status
def upload_workbaskets() -> Tuple[bool, Optional[Dict[Union[str, None], str]]]: """ Upload workbaskets. Returns a bool for success and dict of user messages keyed by envelope_id or None. """ workbaskets = WorkBasket.objects.filter( status=WorkflowStatus.READY_FOR_EXPORT) if not workbaskets: msg = "Nothing to upload: No workbaskets with status READY_FOR_EXPORT." logger.info(msg) return False, {None: msg} # transactions: will be serialized, then added to an envelope for uploaded. transactions = workbaskets.ordered_transactions() if not transactions: msg = f"Nothing to upload: {workbaskets.count()} Workbaskets READY_FOR_EXPORT but none contain any transactions." logger.info(msg) return False, {None: msg} first_envelope_id = int(Envelope.next_envelope_id()) # Write files to a temporary, so they can all be validated before uploading. with tempfile.TemporaryDirectory( prefix="dit-tamato_") as temporary_directory: output_file_constructor = dit_file_generator( temporary_directory, first_envelope_id, ) serializer = MultiFileEnvelopeTransactionSerializer( output_file_constructor, envelope_id=first_envelope_id, max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE, ) rendered_envelopes = list( serializer.split_render_transactions(transactions)) invalid_envelopes = validate_rendered_envelopes(rendered_envelopes) error_messages = { envelope_id: f"Envelope {envelope_id:06} was invalid {exception}" for envelope_id, exception in invalid_envelopes.items() } if error_messages: return False, error_messages # Transactions envelopes are all valid, and ready for upload. user_messages = upload_and_create_envelopes( workbaskets, rendered_envelopes, first_envelope_id, ) return True, user_messages
def upload_and_create_envelopes( workbaskets: QuerySet, rendered_envelopes: Sequence[RenderedTransactions], first_envelope_id, ) -> Dict[Union[int, None], str]: # {envelope_id: message} User messages can be returned to the caller of the task. user_messages = {} current_envelope_id = first_envelope_id for rendered_envelope in rendered_envelopes: envelope = Envelope.new_envelope() if current_envelope_id != int(envelope.envelope_id): # TODO consider locking the table for writes instead logger.error( "Envelope created out of sequence: %s != %s this may due to simultaneous updates causing a race condition.", (current_envelope_id, int(envelope.envelope_id)), ) raise RaceCondition( f"Envelope out of sequence: {envelope.envelope_id} != {current_envelope_id}", ) current_envelope_id = int(envelope.envelope_id) envelope_transactions = [ EnvelopeTransaction(order=order, envelope=envelope, transaction=transaction) for order, transaction in enumerate(rendered_envelope.transactions) ] EnvelopeTransaction.objects.bulk_create(envelope_transactions) envelope.save() rendered_envelope.output.seek(0, os.SEEK_SET) content_file = ContentFile(rendered_envelope.output.read()) upload = Upload() upload.envelope = envelope upload.file = content_file rendered_envelope.output.seek(0, os.SEEK_SET) upload.checksum = md5(rendered_envelope.output.read()).hexdigest() upload.file.save(upload.filename, content_file) if settings.EXPORTER_DISABLE_NOTIFICATION: logger.info("HMRC notification disabled.") else: logger.info("Notify HMRC of upload, %s", upload.filename) upload.notify_hmrc() # sets notification_sent logger.info("Workbasket sent to CDS") workbaskets.update(status=WorkflowStatus.SENT_TO_CDS) logger.debug("Uploaded: %s", upload.filename) user_messages[envelope.envelope_id] = f"Uploaded {upload.filename}" return user_messages
def handle(self, *args, **options): workbaskets = WorkBasket.objects.filter(status=WorkflowStatus.READY_FOR_EXPORT) if not workbaskets: sys.exit("Nothing to upload: No workbaskets with status READY_FOR_EXPORT.") # transactions: will be serialized, then added to an envelope for uploaded. transactions = workbaskets.ordered_transactions() if not transactions: sys.exit( f"Nothing to upload: {workbaskets.count()} Workbaskets READY_FOR_EXPORT but none contain any transactions.", ) if options.get("envelope_id") is not None: envelope_id = int(options.get("envelope_id")) else: envelope_id = int(Envelope.next_envelope_id()) directory = options.get("directory", ".") output_file_constructor = dit_file_generator(directory, envelope_id) serializer = MultiFileEnvelopeTransactionSerializer( output_file_constructor, envelope_id=envelope_id, max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE, ) errors = False for rendered_envelope in serializer.split_render_transactions(transactions): envelope_file = rendered_envelope.output if not rendered_envelope.transactions: self.stdout.write( f"{envelope_file.name} {WARNING_SIGN_EMOJI} is empty !", ) errors = True else: envelope_file.seek(0, os.SEEK_SET) try: validate_envelope(envelope_file) except etree.DocumentInvalid: self.stdout.write( f"{envelope_file.name} {WARNING_SIGN_EMOJI}️ Envelope invalid:", ) else: total_transactions = len(rendered_envelope.transactions) self.stdout.write( f"{envelope_file.name} \N{WHITE HEAVY CHECK MARK} XML valid. {total_transactions} transactions in {envelope_file.tell()} bytes.", ) if errors: sys.exit(1)
def test_transaction_envelope_serializer_splits_output(): """ Verify MultiFileEnvelopeTransactionSerializer outputs the tracked_models passed to it and generates records for descriptions. This test is a bit artificial: testing 40mb splitting would be inefficient, max_envelope_size is set to 7k, small enough to trigger envelope splitting after just one transaction. """ # Add transactions with different kinds of data to the workbasket. approved_workbasket = ApprovedWorkBasketFactory.create() with ApprovedTransactionFactory.create(workbasket=approved_workbasket) as tx1: factories.RegulationFactory.create() with ApprovedTransactionFactory.create(workbasket=approved_workbasket) as tx2: factories.RegulationFactory.create(regulation_group=None), factories.FootnoteTypeFactory.create() with ApprovedTransactionFactory.create(workbasket=approved_workbasket) as tx3: factories.FootnoteTypeFactory.create() transactions = Transaction.objects.filter(pk__in=[tx1.pk, tx2.pk, tx3.pk]) expected_transactions = [ Transaction.objects.filter(pk=tx.pk) for tx in [tx1, tx2, tx3] ] expected_record_codes = [ [ (tracked_model.record_code, tracked_model.subrecord_code) for tracked_model in tx.tracked_models.all() ] for tx in [tx1, tx2, tx3] ] # Create a static buffers to output to + a function to grab each one in turn to use as the constructor. expected_outputs = [io.BytesIO(), io.BytesIO(), io.BytesIO()] def create_output_constructor(): output_iter = iter(expected_outputs) return lambda: next(output_iter) serializer = MultiFileEnvelopeTransactionSerializer( create_output_constructor(), envelope_id=int(Envelope.next_envelope_id()), max_envelope_size=7000, ) for i, rendered_envelope in enumerate( serializer.split_render_transactions(transactions), ): # Base assumption is that this yields RenderedTransactions assert isinstance(rendered_envelope, RenderedTransactions) assert rendered_envelope.output == expected_outputs[i] assert rendered_envelope.is_oversize is False assert 0 < rendered_envelope.output.tell() < 7000 assert len( rendered_envelope.transactions, ), "Serializer should skip empty transactions, they cause XSD validation to fail." assert sorted(rendered_envelope.transactions) == sorted( expected_transactions[i], ) # Verify the XML output output_xml = etree.XML(rendered_envelope.output.getvalue()) output_record_codes = {*taric_xml_record_codes(output_xml)} # TODO - it would be good to check the output more thoroughly than just the record code. # Some record codes are generated in the template, making issuperset required in this assertion. assert output_record_codes.issuperset(expected_record_codes[i])
def test_next_envelope_id(year, first_envelope_id, next_envelope_id): """Verify that envelope ID is made up of two digits of the year and a 4 digit counter starting from 0001.""" with freezegun.freeze_time(f"{year}-01-01"): assert EnvelopeFactory.create(envelope_id=first_envelope_id) assert Envelope.next_envelope_id() == next_envelope_id
def upload_workbasket_envelopes(self, upload_status_data) -> Dict: """ Upload workbaskets. :return :class:`~exporter.util.UploadTaskResultData`: object with user readable feedback on task status. """ upload_status = UploadTaskResultData(**upload_status_data) workbaskets = WorkBasket.objects.filter(status=WorkflowStatus.APPROVED) if not workbaskets: msg = "Nothing to upload: No workbaskets with status APPROVED." logger.info(msg) return dict(upload_status.add_messages([msg])) # transactions: will be serialized, then added to an envelope for upload. transactions = workbaskets.ordered_transactions() if not transactions: msg = f"Nothing to upload: {workbaskets.count()} Workbaskets APPROVED but none contain any transactions." logger.info(msg) return dict(upload_status.add_messages([msg])) first_envelope_id = int(Envelope.next_envelope_id()) # Envelope XML is written to temporary files for validation before anything is created # in the database or uploaded to s3. with tempfile.TemporaryDirectory(prefix="dit-tamato_") as temporary_directory: output_file_constructor = dit_file_generator( temporary_directory, first_envelope_id, ) serializer = MultiFileEnvelopeTransactionSerializer( output_file_constructor, envelope_id=first_envelope_id, max_envelope_size=settings.EXPORTER_MAXIMUM_ENVELOPE_SIZE, ) rendered_envelopes = list(serializer.split_render_transactions(transactions)) envelope_errors = validate_rendered_envelopes(rendered_envelopes) if envelope_errors: for envelope_id, errors in exceptions_as_messages(envelope_errors).items(): upload_status.add_envelope_errors(envelope_id, errors) return dict(upload_status) # Transaction envelope data XML is valid, ready for upload to s3 and creation # of corresponding database objects. # # Feedback for the user is added to a :class:`~exporter.util.UploadTaskResultData` and serialized # so that it can be safely returned in the Celery task result. try: upload_status.update( upload_and_create_envelopes( workbaskets, rendered_envelopes, first_envelope_id, ), ) return dict(upload_status) except ConnectionError as e: # Connection issue during upload. if settings.EXPORTER_UPLOAD_MAX_RETRIES: logger.info( "%s uploading attempting to upload envelope. endpoint: %s error: %s", type(e), e.kwargs.get("endpoint_url"), e.kwargs.get("error"), ) self.retry() else: raise