def process_precompiled_letter_notifications(*, letter_data, api_key, template, reply_to_text): try: status = NOTIFICATION_PENDING_VIRUS_CHECK letter_content = base64.b64decode(letter_data['content']) pages = pdf_page_count(io.BytesIO(letter_content)) except ValueError: raise BadRequestError(message='Cannot decode letter content (invalid base64 encoding)', status_code=400) except PdfReadError: current_app.logger.exception(msg='Invalid PDF received') raise BadRequestError(message='Letter content is not a valid PDF', status_code=400) notification = create_letter_notification(letter_data=letter_data, template=template, api_key=api_key, status=status, reply_to_text=reply_to_text) filename = upload_letter_pdf(notification, letter_content, precompiled=True) pages_per_sheet = 2 notification.billable_units = math.ceil(pages / pages_per_sheet) dao_update_notification(notification) current_app.logger.info('Calling task scan-file for {}'.format(filename)) # call task to add the filename to anti virus queue notify_celery.send_task( name=TaskNames.SCAN_FILE, kwargs={'filename': filename}, queue=QueueNames.ANTIVIRUS, ) return notification
def sanitise_letter(self, filename): try: reference = get_reference_from_filename(filename) notification = dao_get_notification_by_reference(reference) current_app.logger.info('Notification ID {} Virus scan passed: {}'.format(notification.id, filename)) if notification.status != NOTIFICATION_PENDING_VIRUS_CHECK: current_app.logger.info('Sanitise letter called for notification {} which is in {} state'.format( notification.id, notification.status)) return notify_celery.send_task( name=TaskNames.SANITISE_LETTER, kwargs={ 'notification_id': str(notification.id), 'filename': filename, 'allow_international_letters': notification.service.has_permission( INTERNATIONAL_LETTERS ), }, queue=QueueNames.SANITISE_LETTERS, ) except Exception: try: current_app.logger.exception( "RETRY: calling sanitise_letter task for notification {} failed".format(notification.id) ) self.retry(queue=QueueNames.RETRY) except self.MaxRetriesExceededError: message = "RETRY FAILED: Max retries reached. " \ "The task sanitise_letter failed for notification {}. " \ "Notification has been updated to technical-failure".format(notification.id) update_notification_status_by_id(notification.id, NOTIFICATION_TECHNICAL_FAILURE) raise NotificationTechnicalFailureException(message)
def create_pdf_for_templated_letter(self, encrypted_letter_data): letter_details = current_app.encryption_client.decrypt( encrypted_letter_data) current_app.logger.info( f"Creating a pdf for notification with id {letter_details['notification_id']}" ) logo_filename = f'{letter_details["logo_filename"]}.svg' if letter_details[ 'logo_filename'] else None template = LetterPrintTemplate( letter_details['template'], values=letter_details['values'] or None, contact_block=letter_details['letter_contact_block'], # letter assets are hosted on s3 admin_base_url=current_app.config['LETTER_LOGO_URL'], logo_file_name=logo_filename, ) with current_app.test_request_context(''): html = HTML(string=str(template)) try: pdf = BytesIO(html.write_pdf()) except WeasyprintError as exc: self.retry(exc=exc, queue=QueueNames.SANITISE_LETTERS) cmyk_pdf = convert_pdf_to_cmyk(pdf) page_count = get_page_count(cmyk_pdf.read()) cmyk_pdf.seek(0) try: # If the file already exists in S3, it will be overwritten if letter_details["key_type"] == "test": bucket_name = current_app.config['TEST_LETTERS_BUCKET_NAME'] else: bucket_name = current_app.config['LETTERS_PDF_BUCKET_NAME'] s3upload( filedata=cmyk_pdf, region=current_app.config['AWS_REGION'], bucket_name=bucket_name, file_location=letter_details["letter_filename"], ) current_app.logger.info( f"Uploaded letters PDF {letter_details['letter_filename']} to {bucket_name} for " f"notification id {letter_details['notification_id']}") except BotoClientError: current_app.logger.exception( f"Error uploading {letter_details['letter_filename']} to pdf bucket " f"for notification {letter_details['notification_id']}") return notify_celery.send_task(name=TaskNames.UPDATE_BILLABLE_UNITS_FOR_LETTER, kwargs={ "notification_id": letter_details["notification_id"], "page_count": page_count, }, queue=QueueNames.LETTERS)
def replay_letters_in_error(filename=None): # This method can be used to replay letters that end up in the ERROR directory. # We had an incident where clamAV was not processing the virus scan. if filename: move_error_pdf_to_scan_bucket(filename) # call task to add the filename to anti virus queue current_app.logger.info("Calling scan_file for: {}".format(filename)) if current_app.config['ANTIVIRUS_ENABLED']: notify_celery.send_task( name=TaskNames.SCAN_FILE, kwargs={'filename': filename}, queue=QueueNames.ANTIVIRUS, ) else: # stub out antivirus in dev sanitise_letter.apply_async([filename], queue=QueueNames.LETTERS) else: error_files = get_file_names_from_error_bucket() for item in error_files: moved_file_name = item.key.split('/')[1] current_app.logger.info( "Calling scan_file for: {}".format(moved_file_name)) move_error_pdf_to_scan_bucket(moved_file_name) # call task to add the filename to anti virus queue if current_app.config['ANTIVIRUS_ENABLED']: notify_celery.send_task( name=TaskNames.SCAN_FILE, kwargs={'filename': moved_file_name}, queue=QueueNames.ANTIVIRUS, ) else: # stub out antivirus in dev sanitise_letter.apply_async([filename], queue=QueueNames.LETTERS)
def check_job_status(): """ every x minutes do this check select from jobs where job_status == 'in progress' and template_type in ('sms', 'email') and scheduled_at or created_at is older that 30 minutes. if any results then raise error process the rows in the csv that are missing (in another task) just do the check here. """ thirty_minutes_ago = datetime.utcnow() - timedelta(minutes=30) thirty_five_minutes_ago = datetime.utcnow() - timedelta(minutes=35) jobs_not_complete_after_30_minutes = Job.query.filter( Job.job_status == JOB_STATUS_IN_PROGRESS, and_(thirty_five_minutes_ago < Job.processing_started, Job.processing_started < thirty_minutes_ago)).order_by( Job.processing_started).all() # temporarily mark them as ERROR so that they don't get picked up by future check_job_status tasks # if they haven't been re-processed in time. job_ids = [] for job in jobs_not_complete_after_30_minutes: job.job_status = JOB_STATUS_ERROR dao_update_job(job) job_ids.append(str(job.id)) if job_ids: notify_celery.send_task(name=TaskNames.PROCESS_INCOMPLETE_JOBS, args=(job_ids, ), queue=QueueNames.JOBS) raise JobIncompleteError( "Job(s) {} have not completed.".format(job_ids))
def process_precompiled_letter_notifications(*, letter_data, api_key, template, reply_to_text): try: status = NOTIFICATION_PENDING_VIRUS_CHECK letter_content = base64.b64decode(letter_data['content']) except ValueError: raise BadRequestError( message='Cannot decode letter content (invalid base64 encoding)', status_code=400) notification = create_letter_notification(letter_data=letter_data, template=template, api_key=api_key, status=status, reply_to_text=reply_to_text) filename = upload_letter_pdf(notification, letter_content, precompiled=True) current_app.logger.info('Calling task scan-file for {}'.format(filename)) # call task to add the filename to anti virus queue if current_app.config['ANTIVIRUS_ENABLED']: notify_celery.send_task( name=TaskNames.SCAN_FILE, kwargs={'filename': filename}, queue=QueueNames.ANTIVIRUS, ) else: # stub out antivirus in dev sanitise_letter.apply_async([filename], queue=QueueNames.LETTERS) return notification
def collate_letter_pdfs_for_day(date=None): if not date: # Using the truncated date is ok because UTC to BST does not make a difference to the date, # since it is triggered mid afternoon. date = datetime.utcnow().strftime("%Y-%m-%d") letter_pdfs = sorted(s3.get_s3_bucket_objects( current_app.config['LETTERS_PDF_BUCKET_NAME'], subfolder=date), key=lambda letter: letter['Key']) for i, letters in enumerate(group_letters(letter_pdfs)): filenames = [letter['Key'] for letter in letters] hash = urlsafe_b64encode(sha512( ''.join(filenames).encode()).digest())[:20].decode() # eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP dvla_filename = 'NOTIFY.{date}.{num:03}.{hash}.ZIP'.format(date=date, num=i + 1, hash=hash) current_app.logger.info( 'Calling task zip-and-send-letter-pdfs for {} pdfs to upload {} with total size {:,} bytes' .format(len(filenames), dvla_filename, sum(letter['Size'] for letter in letters))) notify_celery.send_task(name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, kwargs={ 'filenames_to_zip': filenames, 'upload_filename': dvla_filename }, queue=QueueNames.PROCESS_FTP, compression='zlib')
def scan_file(self, filename): current_app.logger.info('Scanning file: {}'.format(filename)) try: if clamav_scan(BytesIO(_get_letter_pdf(filename))): task_name = 'sanitise-letter' else: task_name = 'process-virus-scan-failed' current_app.logger.error( 'VIRUS FOUND for file: {}'.format(filename)) current_app.logger.info('Calling task: {} to process {} on API'.format( task_name, filename)) notify_celery.send_task( name=task_name, kwargs={'filename': filename}, queue=QueueNames.LETTERS, ) except (clamd.ClamdError, BotoClientError) as e: try: current_app.logger.exception("Scanning error file: {} {}".format( filename, e)) self.retry(queue=QueueNames.ANTIVIRUS) except self.MaxRetriesExceededError: current_app.logger.exception( "MAX RETRY EXCEEDED: Task scan_file failed for file: {}". format(filename)) notify_celery.send_task( name='process-virus-scan-error', kwargs={'filename': filename}, queue=QueueNames.LETTERS, )
def sanitise_and_upload_letter(notification_id, filename, allow_international_letters=False): current_app.logger.info( 'Sanitising notification with id {}'.format(notification_id)) try: pdf_content = s3download( current_app.config['LETTERS_SCAN_BUCKET_NAME'], filename).read() sanitisation_details = sanitise_file_contents( pdf_content, allow_international_letters=allow_international_letters, filename=filename) # Only files that have failed sanitisation have 'message' in the sanitisation_details dict if sanitisation_details.get('message'): validation_status = 'failed' else: validation_status = 'passed' file_data = base64.b64decode(sanitisation_details['file'].encode()) redaction_failed_message = sanitisation_details.get( 'redaction_failed_message') if redaction_failed_message: current_app.logger.info( f'{redaction_failed_message} for file {filename}') copy_redaction_failed_pdf(filename) # If the file already exists in S3, it will be overwritten s3upload( filedata=file_data, region=current_app.config['AWS_REGION'], bucket_name=current_app.config['SANITISED_LETTER_BUCKET_NAME'], file_location=filename, ) current_app.logger.info('Notification {} sanitisation: {}'.format( validation_status, notification_id)) except BotoClientError: current_app.logger.exception( "Error downloading {} from scan bucket or uploading to sanitise bucket for notification {}" .format(filename, notification_id)) return sanitise_data = { 'page_count': sanitisation_details['page_count'], 'message': sanitisation_details['message'], 'invalid_pages': sanitisation_details['invalid_pages'], 'validation_status': validation_status, 'filename': filename, 'notification_id': notification_id, 'address': sanitisation_details['recipient_address'] } encrypted_data = current_app.encryption_client.encrypt(sanitise_data) notify_celery.send_task(name=TaskNames.PROCESS_SANITISED_LETTER, args=(encrypted_data, ), queue=QueueNames.LETTERS)
def trigger_letter_pdfs_for_day(): letter_pdfs_count = dao_get_count_of_letters_to_process_for_date() if letter_pdfs_count: notify_celery.send_task(name='collate-letter-pdfs-for-day', args=(date.today().strftime("%Y-%m-%d"), ), queue=QueueNames.LETTERS) current_app.logger.info("{} letter pdfs to be process by {} task".format( letter_pdfs_count, 'collate-letter-pdfs-for-day'))
def test_send_task_injects_global_request_id_into_kwargs(mocker, app): super_apply = mocker.patch('celery.Celery.send_task') with app.app_context(): g.request_id = '1234' notify_celery.send_task('some-task') super_apply.assert_called_with('some-task', None, {'request_id': '1234'})
def run_letter_jobs(): job_ids = dao_get_letter_job_ids_by_status(JOB_STATUS_READY_TO_SEND) if job_ids: notify_celery.send_task(name=TaskNames.DVLA_JOBS, args=(job_ids, ), queue=QueueNames.PROCESS_FTP) current_app.logger.info( "Queued {} ready letter job ids onto {}".format( len(job_ids), QueueNames.PROCESS_FTP))
def collate_letter_pdfs_to_be_sent(): """ Finds all letters which are still waiting to be sent to DVLA for printing This would usually be run at 5.50pm and collect up letters created between before 5:30pm today that have not yet been sent. If run after midnight, it will collect up letters created before 5:30pm the day before. """ current_app.logger.info("starting collate-letter-pdfs-to-be-sent") print_run_date = convert_utc_to_bst(datetime.utcnow()) if print_run_date.time() < LETTER_PROCESSING_DEADLINE: print_run_date = print_run_date - timedelta(days=1) print_run_deadline = print_run_date.replace(hour=17, minute=30, second=0, microsecond=0) _get_letters_and_sheets_volumes_and_send_to_dvla(print_run_deadline) for postage in POSTAGE_TYPES: current_app.logger.info( f"starting collate-letter-pdfs-to-be-sent processing for postage class {postage}" ) letters_to_print = get_key_and_size_of_letters_to_be_sent_to_print( print_run_deadline, postage) for i, letters in enumerate(group_letters(letters_to_print)): filenames = [letter['Key'] for letter in letters] service_id = letters[0]['ServiceId'] hash = urlsafe_b64encode( sha512(''.join(filenames).encode()).digest())[:20].decode() # eg NOTIFY.2018-12-31.001.Wjrui5nAvObjPd-3GEL-.ZIP dvla_filename = 'NOTIFY.{date}.{postage}.{num:03}.{hash}.{service_id}.ZIP'.format( date=print_run_deadline.strftime("%Y-%m-%d"), postage=RESOLVE_POSTAGE_FOR_FILE_NAME[postage], num=i + 1, hash=hash, service_id=service_id) current_app.logger.info( 'Calling task zip-and-send-letter-pdfs for {} pdfs to upload {} with total size {:,} bytes' .format(len(filenames), dvla_filename, sum(letter['Size'] for letter in letters))) notify_celery.send_task(name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, kwargs={ 'filenames_to_zip': filenames, 'upload_filename': dvla_filename }, queue=QueueNames.PROCESS_FTP, compression='zlib') current_app.logger.info( f"finished collate-letter-pdfs-to-be-sent processing for postage class {postage}" ) current_app.logger.info("finished collate-letter-pdfs-to-be-sent")
def test_send_task_injects_id_into_kwargs_from_request(mocker, app): super_apply = mocker.patch('celery.Celery.send_task') request_id_header = app.config['NOTIFY_TRACE_ID_HEADER'] request_headers = {request_id_header: '1234'} with app.test_request_context(headers=request_headers): notify_celery.send_task('some-task') super_apply.assert_called_with('some-task', None, {'request_id': '1234'})
def test_send_task_injects_request_id_with_positional_args( mocker, notify_antivirus): super_apply = mocker.patch('celery.Celery.send_task') g.request_id = '1234' notify_celery.send_task('some-task', ['args'], {'kw': 'args'}) super_apply.assert_called_with('some-task', ['args'], { 'request_id': '1234', 'kw': 'args' })
def test_send_task_injects_request_id_with_other_kwargs( mocker, notify_antivirus): super_apply = mocker.patch('celery.Celery.send_task') g.request_id = '1234' notify_celery.send_task('some-task', kwargs={'something': 'else'}) super_apply.assert_called_with('some-task', None, { 'request_id': '1234', 'something': 'else' })
def collate_letter_pdfs_for_day(date): letter_pdfs = s3.get_s3_bucket_objects( current_app.config['LETTERS_PDF_BUCKET_NAME'], subfolder=date) for letters in group_letters(letter_pdfs): filenames = [letter['Key'] for letter in letters] current_app.logger.info( 'Calling task zip-and-send-letter-pdfs for {} pdfs of total size {:,} bytes' .format(len(filenames), sum(letter['Size'] for letter in letters))) notify_celery.send_task(name=TaskNames.ZIP_AND_SEND_LETTER_PDFS, kwargs={'filenames_to_zip': filenames}, queue=QueueNames.PROCESS_FTP, compression='zlib')
def get_pdf_for_templated_letter(self, notification_id): try: notification = get_notification_by_id(notification_id, _raise=True) letter_filename = get_letter_pdf_filename( reference=notification.reference, crown=notification.service.crown, sending_date=notification.created_at, dont_use_sending_date=notification.key_type == KEY_TYPE_TEST, postage=notification.postage) letter_data = { 'letter_contact_block': notification.reply_to_text, 'template': { "subject": notification.template.subject, "content": notification.template.content, "template_type": notification.template.template_type }, 'values': notification.personalisation, 'logo_filename': notification.service.letter_branding and notification.service.letter_branding.filename, 'letter_filename': letter_filename, "notification_id": str(notification_id), 'key_type': notification.key_type } encrypted_data = encryption.encrypt(letter_data) notify_celery.send_task(name=TaskNames.CREATE_PDF_FOR_TEMPLATED_LETTER, args=(encrypted_data, ), queue=QueueNames.SANITISE_LETTERS) except Exception: try: current_app.logger.exception( f"RETRY: calling create-letter-pdf task for notification {notification_id} failed" ) self.retry(queue=QueueNames.RETRY) except self.MaxRetriesExceededError: message = f"RETRY FAILED: Max retries reached. " \ f"The task create-letter-pdf failed for notification id {notification_id}. " \ f"Notification has been updated to technical-failure" update_notification_status_by_id(notification_id, NOTIFICATION_TECHNICAL_FAILURE) raise NotificationTechnicalFailureException(message)
def update_notifications(task_name, references): # split up references into 1000 item sublists to ensure we don't go over SQS's max item size of 256kb for notification_references in chunk_list(references, 1000): notify_celery.send_task(name=task_name, args=(notification_references, ), queue=NOTIFY_QUEUE)
def send_letter_jobs(): job_ids = validate(request.get_json(), letter_job_ids) notify_celery.send_task(name=TaskNames.DVLA_JOBS, args=(job_ids['job_ids'],), queue=QueueNames.PROCESS_FTP) return jsonify(data={"response": "Task created to send files to DVLA"}), 201