def start_a_generation(job, start_date, end_date, agency_code): """ Validates the start and end dates of the generation and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_code: Agency code for A file generations """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() mark_job_status(job.job_id, "waiting") file_type = job.file_type.letter_name log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type} logger.info(log_data) # Set SQS message attributes message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}} # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def job_to_dict(job): """ Convert a Job model into a dictionary, ready to be serialized as JSON Args: job: job to convert into a dictionary Returns: A dictionary of job information """ sess = GlobalDB.db().session job_info = { 'job_id': job.job_id, 'job_status': job.job_status_name, 'job_type': job.job_type_name, 'filename': job.original_filename, 'file_size': job.file_size, 'number_of_rows': job.number_of_rows, 'file_type': job.file_type_name or '' } # @todo replace with relationships file_results = sess.query(File).filter_by(job_id=job.job_id).one_or_none() if file_results is None: # Job ID not in error database, probably did not make it to validation, or has not yet been validated job_info.update({ 'file_status': "", 'error_type': "", 'error_data': [], 'warning_data': [], 'missing_headers': [], 'duplicated_headers': [] }) else: # If job ID was found in file, we should be able to get header error lists and file data. Get string of missing # headers and parse as a list job_info['file_status'] = file_results.file_status_name job_info['missing_headers'] = StringCleaner.split_csv(file_results.headers_missing) job_info["duplicated_headers"] = StringCleaner.split_csv(file_results.headers_duplicated) job_info["error_type"] = get_error_type(job.job_id) job_info["error_data"] = get_error_metrics_by_job_id(job.job_id, job.job_type_name == 'validation', severity_id=RULE_SEVERITY_DICT['fatal']) job_info["warning_data"] = get_error_metrics_by_job_id(job.job_id, job.job_type_name == 'validation', severity_id=RULE_SEVERITY_DICT['warning']) return job_info
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None, file_format='csv', element_numbers=False): """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if its not detached generation), and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code for detached D file generations file_format: determines if the file generated is a txt or a csv element_numbers: determines if the alternate headers with FPDS element numbers should be used """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() # Update submission if job.submission_id: agency_code = update_generation_submission(sess, job) mark_job_status(job.job_id, 'waiting') file_generation = retrieve_cached_file_generation(job, agency_type, agency_code, file_format, element_numbers) if file_generation: try: copy_file_generation_to_job(job, file_generation, g.is_local) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) sess.commit() else: # Create new FileGeneration and reset Jobs file_generation = FileGeneration( request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date, file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, file_format=file_format, is_cached_file=True, element_numbers=element_numbers) sess.add(file_generation) sess.commit() try: job.file_generation_id = file_generation.file_generation_id sess.commit() reset_generation_jobs(sess, job) logger.info({'message': 'Sending new FileGeneration {} to SQS'.format(file_generation.file_generation_id), 'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id, 'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id}) # Add file_generation_id to the SQS job queue queue = sqs_queue() message_attr = {"validation_type": {"DataType": "String", "StringValue": "generation"}} queue.send_message(MessageBody=str(file_generation.file_generation_id), MessageAttributes=message_attr) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) file_generation.is_cached_file = False sess.commit()
def start_generation_job(job, start_date, end_date, agency_code=None): """ Validates the dates for a D file generation job and passes the Job ID to SQS Args: job: File generation job to start start_date: Start date of the file generation end_date: End date of the file generation agency_code: Agency code for detached D file generations Returns: Tuple of boolean indicating successful start, and error response if False """ sess = GlobalDB.db().session file_type = job.file_type.letter_name try: if file_type in ['D1', 'D2']: # Validate and set Job's start and end dates if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException( "Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR) job.start_date = start_date job.end_date = end_date sess.commit() elif file_type not in ["E", "F"]: raise ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR) except ResponseException as e: return False, JsonResponse.error(e, e.status, file_type=file_type, status='failed') mark_job_status(job.job_id, "waiting") # Add job_id to the SQS job queue logger.info({ 'message_type': 'ValidatorInfo', 'job_id': job.job_id, 'message': 'Sending file generation job {} to Validator in SQS'.format(job.job_id) }) queue = sqs_queue() message_attr = { 'agency_code': { 'DataType': 'String', 'StringValue': agency_code } } if agency_code else {} response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) logger.debug({ 'message_type': 'ValidatorInfo', 'job_id': job.job_id, 'message': 'Send message response: {}'.format(response) }) return True, None
def generate_detached_file(file_type, cgac_code, frec_code, start, end, quarter, agency_type): """ Start a file generation job for the specified file type not connected to a submission Args: file_type: type of file to be generated cgac_code: the code of a CGAC agency if generating for a CGAC agency frec_code: the code of a FREC agency if generating for a FREC agency start: start date in a string, formatted MM/DD/YYYY end: end date in a string, formatted MM/DD/YYYY quarter: quarter to generate for, formatted Q#/YYYY agency_type: The type of agency (awarding or funding) to generate the file for Returns: JSONResponse object with keys job_id, status, file_type, url, message, start, and end. Raises: ResponseException: if the start and end Strings cannot be parsed into dates """ # Make sure it's a valid request if not cgac_code and not frec_code: return JsonResponse.error( ValueError( "Detached file generation requires CGAC or FR Entity Code"), StatusCode.CLIENT_ERROR) if file_type in ['D1', 'D2']: # Make sure we have a start and end date for D1/D2 generation if not start or not end: return JsonResponse.error( ValueError( "Must have a start and end date for D file generation."), StatusCode.CLIENT_ERROR) # Check if date format is MM/DD/YYYY if not (StringCleaner.is_date(start) and StringCleaner.is_date(end)): raise ResponseException( 'Start or end date cannot be parsed into a date', StatusCode.CLIENT_ERROR) if agency_type not in ('awarding', 'funding'): return JsonResponse.error( ValueError("agency_type must be either awarding or funding."), StatusCode.CLIENT_ERROR) else: # Check if date format is Q#/YYYY if not quarter: return JsonResponse.error( ValueError("Must have a quarter for A file generation."), StatusCode.CLIENT_ERROR) try: start, end = generic_helper.quarter_to_dates(quarter) except ResponseException as e: return JsonResponse.error(e, StatusCode.CLIENT_ERROR) # Add job info file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type] new_job = generation_helper.add_generation_job_info( file_type_name=file_type_name, start_date=start, end_date=end) agency_code = frec_code if frec_code else cgac_code log_data = { 'message': 'Starting detached {} file generation'.format(file_type), 'message_type': 'BrokerInfo', 'job_id': new_job.job_id, 'file_type': file_type, 'agency_code': agency_code, 'start_date': start, 'end_date': end } logger.info(log_data) try: if file_type in ['D1', 'D2']: generation_helper.start_d_generation(new_job, start, end, agency_type, agency_code=agency_code) else: generation_helper.start_a_generation(new_job, start, end, agency_code) except Exception as e: mark_job_status(new_job.job_id, 'failed') new_job.error_message = str(e) GlobalDB.db().session.commit() return JsonResponse.error(e, StatusCode.INTERNAL_ERROR) # Return same response as check generation route return check_detached_generation(new_job.job_id)
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None): """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if its not detached generation), and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code for detached D file generations """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() # Update submission if job.submission_id: agency_code = update_generation_submission(sess, job) mark_job_status(job.job_id, 'waiting') file_generation = retrieve_cached_file_generation(job, agency_type, agency_code) if file_generation: try: copy_file_generation_to_job(job, file_generation, g.is_local) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) sess.commit() else: # Create new FileGeneration and reset Jobs file_generation = FileGeneration( request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date, file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, is_cached_file=True) sess.add(file_generation) sess.commit() try: job.file_generation_id = file_generation.file_generation_id sess.commit() reset_generation_jobs(sess, job) logger.info({'message': 'Sending new FileGeneration {} to SQS'.format(file_generation.file_generation_id), 'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id, 'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id}) # Add file_generation_id to the SQS job queue queue = sqs_queue() message_attr = {"validation_type": {"DataType": "String", "StringValue": "generation"}} queue.send_message(MessageBody=str(file_generation.file_generation_id), MessageAttributes=message_attr) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) file_generation.is_cached_file = False sess.commit()
def generate_detached_file(file_type, cgac_code, frec_code, start_date, end_date, year, period, agency_type, file_format): """ Start a file generation job for the specified file type not connected to a submission Args: file_type: type of file to be generated cgac_code: the code of a CGAC agency if generating for a CGAC agency frec_code: the code of a FREC agency if generating for a FREC agency start_date: start date in a string, formatted MM/DD/YYYY end_date: end date in a string, formatted MM/DD/YYYY year: year to generate for, integer 4 digits period: period to generate for, integer (2-12) agency_type: The type of agency (awarding or funding) to generate the file for file_format: determines if the file generated is a txt or a csv (only used for D file generation) Returns: JSONResponse object with keys job_id, status, file_type, url, message, start_date, and end_date. Raises: ResponseException: if the start_date and end_date Strings cannot be parsed into dates """ # Make sure it's a valid request if not cgac_code and not frec_code: return JsonResponse.error( ValueError( "Detached file generation requires CGAC or FR Entity Code"), StatusCode.CLIENT_ERROR) if file_type in ['D1', 'D2']: # Make sure we have a start and end date for D1/D2 generation if not start_date or not end_date: return JsonResponse.error( ValueError( 'Must have a start and end date for D file generation.'), StatusCode.CLIENT_ERROR) # Check if date format is MM/DD/YYYY if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException( 'Start or end date cannot be parsed into a date', StatusCode.CLIENT_ERROR) if agency_type not in ['awarding', 'funding']: return JsonResponse.error( ValueError('agency_type must be either awarding or funding.'), StatusCode.CLIENT_ERROR) if file_format not in ['csv', 'txt']: return JsonResponse.error( ValueError('file_format must be either csv or txt.'), StatusCode.CLIENT_ERROR) else: # Make sure both year and period are provided if not (year and period): return JsonResponse.error( ValueError( "Must have a year and period for A file generation."), StatusCode.CLIENT_ERROR) try: # Convert to real start and end dates start_date, end_date = generic_helper.year_period_to_dates( year, period) except ResponseException as e: return JsonResponse.error(e, StatusCode.CLIENT_ERROR) # Add job info file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type] new_job = generation_helper.create_generation_job(file_type_name, start_date, end_date) agency_code = frec_code if frec_code else cgac_code logger.info({ 'message': 'Starting detached {} file generation'.format(file_type), 'message_type': 'BrokerInfo', 'job_id': new_job.job_id, 'file_type': file_type, 'agency_code': agency_code, 'start_date': start_date, 'end_date': end_date }) try: if file_type in ['D1', 'D2']: generation_helper.start_d_generation(new_job, start_date, end_date, agency_type, agency_code=agency_code, file_format=file_format) else: generation_helper.start_a_generation(new_job, start_date, end_date, agency_code) except Exception as e: mark_job_status(new_job.job_id, 'failed') new_job.error_message = str(e) GlobalDB.db().session.commit() return JsonResponse.error(e, StatusCode.INTERNAL_ERROR) # Return same response as check generation route return check_detached_generation(new_job.job_id)
def startGenerationJob(self, submission_id, file_type): """ Initiates a file generation job Args: submission_id: ID of submission to start job for file_type: Type of file to be generated Returns: Tuple of boolean indicating successful start, and error response if False """ jobDb = self.interfaces.jobDb file_type_name = self.fileTypeMap[file_type] if file_type in ["D1", "D2"]: # Populate start and end dates, these should be provided in MM/DD/YYYY format, using calendar year (not fiscal year) requestDict = RequestDictionary(self.request) start_date = requestDict.getValue("start") end_date = requestDict.getValue("end") if not (StringCleaner.isDate(start_date) and StringCleaner.isDate(end_date)): exc = ResponseException( "Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR) return False, JsonResponse.error(exc, exc.status, start="", end="", file_type=file_type, status="failed") elif file_type not in ["E", "F"]: exc = ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR) return False, JsonResponse.error(exc, exc.status, file_type=file_type, status="failed") cgac_code = self.jobManager.getSubmissionById(submission_id).cgac_code # Generate and upload file to S3 user_id = LoginSession.getName(session) timestamped_name = s3UrlHandler.getTimestampedFilename( CONFIG_BROKER["".join([str(file_type_name), "_file_name"])]) if self.isLocal: upload_file_name = "".join( [CONFIG_BROKER['broker_files'], timestamped_name]) else: upload_file_name = "".join([str(user_id), "/", timestamped_name]) job = jobDb.getJobBySubmissionFileTypeAndJobType( submission_id, file_type_name, "file_upload") job.filename = upload_file_name job.original_filename = timestamped_name job.job_status_id = jobDb.getJobStatusId("running") jobDb.session.commit() if file_type in ["D1", "D2"]: CloudLogger.log("DEBUG: Adding job info for job id of " + str(job.job_id), log_type="debug", file_name=self.debug_file_name) return self.addJobInfoForDFile(upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job) elif file_type == 'E': generate_e_file.delay(submission_id, job.job_id, InterfaceHolder, timestamped_name, upload_file_name, self.isLocal) elif file_type == 'F': generate_f_file.delay(submission_id, job.job_id, InterfaceHolder, timestamped_name, upload_file_name, self.isLocal) return True, None
def generate_detached_file(file_type, cgac_code, frec_code, start_date, end_date, year, period, agency_type): """ Start a file generation job for the specified file type not connected to a submission Args: file_type: type of file to be generated cgac_code: the code of a CGAC agency if generating for a CGAC agency frec_code: the code of a FREC agency if generating for a FREC agency start_date: start date in a string, formatted MM/DD/YYYY end_date: end date in a string, formatted MM/DD/YYYY year: year to generate for, integer 4 digits period: period to generate for, integer (2-12) agency_type: The type of agency (awarding or funding) to generate the file for Returns: JSONResponse object with keys job_id, status, file_type, url, message, start_date, and end_date. Raises: ResponseException: if the start_date and end_date Strings cannot be parsed into dates """ # Make sure it's a valid request if not cgac_code and not frec_code: return JsonResponse.error(ValueError("Detached file generation requires CGAC or FR Entity Code"), StatusCode.CLIENT_ERROR) if file_type in ['D1', 'D2']: # Make sure we have a start and end date for D1/D2 generation if not start_date or not end_date: return JsonResponse.error(ValueError("Must have a start and end date for D file generation."), StatusCode.CLIENT_ERROR) # Check if date format is MM/DD/YYYY if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException('Start or end date cannot be parsed into a date', StatusCode.CLIENT_ERROR) if agency_type not in ('awarding', 'funding'): return JsonResponse.error(ValueError("agency_type must be either awarding or funding."), StatusCode.CLIENT_ERROR) else: # Make sure both year and period are provided if not (year and period): return JsonResponse.error(ValueError("Must have a year and period for A file generation."), StatusCode.CLIENT_ERROR) try: # Convert to real start and end dates start_date, end_date = generic_helper.year_period_to_dates(year, period) except ResponseException as e: return JsonResponse.error(e, StatusCode.CLIENT_ERROR) # Add job info file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type] new_job = generation_helper.create_generation_job(file_type_name, start_date, end_date) agency_code = frec_code if frec_code else cgac_code logger.info({'message': 'Starting detached {} file generation'.format(file_type), 'message_type': 'BrokerInfo', 'job_id': new_job.job_id, 'file_type': file_type, 'agency_code': agency_code, 'start_date': start_date, 'end_date': end_date}) try: if file_type in ['D1', 'D2']: generation_helper.start_d_generation(new_job, start_date, end_date, agency_type, agency_code=agency_code) else: generation_helper.start_a_generation(new_job, start_date, end_date, agency_code) except Exception as e: mark_job_status(new_job.job_id, 'failed') new_job.error_message = str(e) GlobalDB.db().session.commit() return JsonResponse.error(e, StatusCode.INTERNAL_ERROR) # Return same response as check generation route return check_detached_generation(new_job.job_id)