def test_timeout(self, mock_creation_date, mock_log_error): # no timeout mock_creation_date.return_value = date.to_string( date.get_datetime_now() - timedelta(hours=35, minutes=59)) self.assertFalse(self.request_tracker.timeout) # timeout mock_creation_date.return_value = date.to_string( date.get_datetime_now() - timedelta(hours=36, minutes=1)) self.assertTrue(self.request_tracker.timeout) mock_log_error.assert_called_once()
def create_data_version_table_entry(self, version: int): """ Put a new item in the Data Version table responsible for describing the current and previous Redshift data versions for a deployment. If the new version already exists, it will be overwritten by the new entry. :param version: Version number to create """ api_handler = V1ApiHandler() project_cell_counts = api_handler.describe_filter( "project.provenance.document_id")['cell_counts'] metadata_schema_versions = {} for schema_name in SUPPORTED_METADATA_SCHEMA_VERSIONS: metadata_schema_versions[ schema_name. value] = SUPPORTED_METADATA_SCHEMA_VERSIONS[schema_name] self._get_dynamo_table_resource_from_enum( DynamoTable.DATA_VERSION_TABLE).put_item( Item={ DataVersionTableField.DATA_VERSION.value: version, DataVersionTableField.CREATION_DATE.value: date.get_datetime_now(as_string=True), DataVersionTableField.PROJECT_CELL_COUNTS.value: project_cell_counts, DataVersionTableField.METADATA_SCHEMA_VERSIONS.value: metadata_schema_versions })
def run(self): try: LOGGER.debug( f"Beginning matrix conversion run for {self.args.request_id}") self.expression_manifest = self._parse_manifest( self.args.expression_manifest_key) self.cell_manifest = self._parse_manifest( self.args.cell_metadata_manifest_key) self.gene_manifest = self._parse_manifest( self.args.gene_metadata_manifest_key) LOGGER.debug(f"Beginning conversion to {self.format}") local_converted_path = getattr(self, f"_to_{self.format}")() LOGGER.debug(f"Conversion to {self.format} completed") LOGGER.debug(f"Beginning upload to S3") self._upload_converted_matrix(local_converted_path, self.target_path) LOGGER.debug("Upload to S3 complete, job finished") os.remove(local_converted_path) self.request_tracker.complete_subtask_execution(Subtask.CONVERTER) self.request_tracker.complete_request( duration=(date.get_datetime_now() - date.to_datetime(self.request_tracker.creation_date) ).total_seconds()) except Exception as e: LOGGER.info( f"Matrix Conversion failed on {self.args.request_id} with error {str(e)}" ) self.request_tracker.log_error(str(e)) raise e
def create_request_table_entry( self, request_id: str, fmt: str, metadata_fields: list = DEFAULT_FIELDS, feature: str = DEFAULT_FEATURE, genus_species: GenusSpecies = GenusSpecies.HUMAN): """ Put a new item in the Request table responsible for tracking the inputs, task execution progress and errors of a Matrix Request. :param request_id: UUID identifying a matrix service request. :param fmt: User requested output file format of final expression matrix. :param metadata_fields: User requested metadata fields to include in the expression matrix. :param feature: User requested feature type of final expression matrix (gene|transcript). """ data_version = \ self.get_table_item(table=DynamoTable.DEPLOYMENT_TABLE, key=os.environ['DEPLOYMENT_STAGE'])[DeploymentTableField.CURRENT_DATA_VERSION.value] self._get_dynamo_table_resource_from_enum( DynamoTable.REQUEST_TABLE).put_item( Item={ RequestTableField.REQUEST_ID.value: request_id, RequestTableField.REQUEST_HASH.value: "N/A", RequestTableField.DATA_VERSION.value: data_version, RequestTableField.CREATION_DATE.value: date.get_datetime_now(as_string=True), RequestTableField.GENUS_SPECIES.value: genus_species.value, RequestTableField.FORMAT.value: fmt, RequestTableField.METADATA_FIELDS.value: metadata_fields, RequestTableField.FEATURE.value: feature, RequestTableField.NUM_BUNDLES.value: -1, RequestTableField.ROW_COUNT.value: 0, RequestTableField.EXPECTED_DRIVER_EXECUTIONS.value: 1, RequestTableField.COMPLETED_DRIVER_EXECUTIONS.value: 0, RequestTableField.EXPECTED_QUERY_EXECUTIONS.value: 3, RequestTableField.COMPLETED_QUERY_EXECUTIONS.value: 0, RequestTableField.EXPECTED_CONVERTER_EXECUTIONS.value: 1, RequestTableField.COMPLETED_CONVERTER_EXECUTIONS.value: 0, RequestTableField.BATCH_JOB_ID.value: "N/A", RequestTableField.ERROR_MESSAGE.value: 0 })
def _log_error(entity: str, exception: Exception, trace: str, extractor: DSSExtractor): """ Logs an ETL error and exception stack trace to a file. Error messages and exceptions are written to 'errors.txt' A list of failed entities are written to 'failed_transforms.txt' :param entity: A MetadataToPsvTransformer, or a bundle FQID for CellExpressionTransformer errors :param exception: Thrown exception string :param trace: Exception stack trace :param extractor: DSSExtractor """ logger.error(f"Failed to transform {entity}.", exception) timestamp = date.get_datetime_now(as_string=True) log_file_path = os.path.join(extractor.sd, MetadataToPsvTransformer.LOG_DIRNAME, 'errors.txt') with open(log_file_path, 'a+') as fh: fh.write( f"[{timestamp}] {entity} failed with exception: {exception}\n{trace}\n" ) ft_file_path = os.path.join(extractor.sd, MetadataToPsvTransformer.LOG_DIRNAME, 'failed_transforms.txt') with open(ft_file_path, 'a+') as fh: fh.write(f"{entity}\n")
def timeout(self) -> bool: timeout = date.to_datetime( self.creation_date) < date.get_datetime_now() - timedelta(hours=36) if timeout: self.log_error( "This request has timed out after 12 hours." "Please try again by resubmitting the POST request.") return timeout
def test_get_matrix_processing__post_driver(self, mock_is_request_complete, mock_get_table_item, mock_initialized): request_id = str(uuid.uuid4()) mock_initialized.return_value = True mock_is_request_complete.return_value = False mock_get_table_item.return_value = {RequestTableField.ERROR_MESSAGE.value: "", RequestTableField.FORMAT.value: "test_format", RequestTableField.CREATION_DATE.value: get_datetime_now(as_string=True)} response = get_matrix(request_id) self.assertEqual(response[1], requests.codes.ok) self.assertEqual(response[0]['status'], MatrixRequestStatus.IN_PROGRESS.value)
def test_is_expired(self, mock_exists, mock_creation_date, mock_log_error): with self.subTest("Expired"): mock_exists.return_value = False mock_creation_date.return_value = date.to_string( date.get_datetime_now() - timedelta(days=30, minutes=1)) self.assertTrue(self.request_tracker.is_expired) mock_log_error.assert_called_once() mock_log_error.reset_mock() with self.subTest( "Not expired. Matrix DNE but not past expiration date"): mock_exists.return_value = False mock_creation_date.return_value = date.to_string( date.get_datetime_now() - timedelta(days=29)) self.assertFalse(self.request_tracker.is_expired) mock_log_error.assert_not_called() with self.subTest("Not expired. Matrix exists"): mock_exists.return_value = True self.assertFalse(self.request_tracker.is_expired) mock_log_error.assert_not_called()
def is_expired(self): """ Whether or not the request has expired and the matrix in S3 has been deleted. :return: bool """ s3_results_bucket_handler = S3Handler( os.environ['MATRIX_RESULTS_BUCKET']) is_past_expiration = date.to_datetime( self.creation_date) < date.get_datetime_now() - timedelta(days=30) is_expired = not s3_results_bucket_handler.exists( self.s3_results_key) and is_past_expiration if is_expired: self.log_error( "This request has expired after 30 days and is no longer available for download. " "A new matrix can be generated by resubmitting the POST request to /v1/matrix." ) return is_expired
def run(self): try: LOGGER.debug( f"Beginning matrix conversion run for {self.args.request_id}") self.query_results = { QueryType.CELL: CellQueryResultsReader(self.args.cell_metadata_manifest_key), QueryType.EXPRESSION: ExpressionQueryResultsReader( self.args.expression_manifest_key), QueryType.FEATURE: FeatureQueryResultsReader(self.args.gene_metadata_manifest_key) } if self.query_results[QueryType.CELL].is_empty: LOGGER.debug( f"Short-circuiting conversion because there are no cells.") pathlib.Path(self.local_output_filename).touch() local_converted_path = self.local_output_filename else: LOGGER.debug(f"Beginning conversion to {self.format}") local_converted_path = getattr(self, f"_to_{self.format}")() LOGGER.debug(f"Conversion to {self.format} completed") LOGGER.debug(f"Beginning upload to S3") self._upload_converted_matrix(local_converted_path, self.target_path) LOGGER.debug("Upload to S3 complete, job finished") os.remove(local_converted_path) self.request_tracker.complete_subtask_execution(Subtask.CONVERTER) self.request_tracker.complete_request( duration=(date.get_datetime_now() - date.to_datetime(self.request_tracker.creation_date) ).total_seconds()) except Exception as e: LOGGER.info( f"Matrix Conversion failed on {self.args.request_id} with error {str(e)}" ) self.request_tracker.log_error(str(e)) raise e
def test_get_matrix_no_cells(self, mock_is_request_complete, mock_get_table_item, mock_batch_job_status, mock_s3_size): request_id = str(uuid.uuid4()) mock_get_table_item.return_value = { RequestTableField.DATA_VERSION.value: 0, RequestTableField.ERROR_MESSAGE.value: "", RequestTableField.FORMAT.value: "test_format", RequestTableField.GENUS_SPECIES.value: GenusSpecies.HUMAN.value, RequestTableField.CREATION_DATE.value: get_datetime_now(as_string=True) } mock_batch_job_status.return_value = "SUCCEEDED" mock_is_request_complete.return_value = True mock_s3_size.return_value = 0 response = core.get_matrix(request_id) self.assertEqual(response[1], requests.codes.ok) self.assertEqual(response[0]['status'], MatrixRequestStatus.COMPLETE.value) self.assertEqual(response[0]['matrix_url'], "")
def test_get_datetime_now(self): date_now = date.get_datetime_now() self.assertTrue((date_now - self.now).total_seconds() <= 1)
def setup(self): self.timestamp = date.get_datetime_now(as_string=True) print(f"Running test with timestamp {self.timestamp}")