def test_add_file_to_dataset_kafka(self, mocker, mock_rabbit_adaptor): processor = LookupResultProcessor(mock_rabbit_adaptor, None, "http://cern.analysis.ch:5000/") dataset_file = DatasetFile(request_id="BR549", file_path="/foo/bar.root", adler32='12345', file_size=1024, file_events=500) dataset_file.id = 42 dataset_file.save_to_db = mocker.Mock() processor.add_file_to_dataset(self._generate_transform_request(), dataset_file) dataset_file.save_to_db.assert_called() mock_rabbit_adaptor.basic_publish.assert_called_with( exchange='transformation_requests', routing_key='BR549', body=json.dumps({ "request-id": 'BR549', "file-id": 42, "columns": 'electron.eta(), muon.pt()', "file-path": "/foo/bar.root", "tree-name": "Events", "service-endpoint": "http://cern.analysis.ch:5000/servicex/transformation/BR549", 'result-destination': 'kafka', 'kafka-broker': 'http://ssl-hep.org.kafka:12345' }))
def put(self, request_id): try: from servicex.models import db add_file_request = request.get_json() submitted_request = TransformRequest.return_request(request_id) db_record = DatasetFile(request_id=request_id, file_path=add_file_request['file_path'], adler32=add_file_request['adler32'], file_events=add_file_request['file_events'], file_size=add_file_request['file_size']) self.lookup_result_processor.add_file_to_dataset(submitted_request, db_record) if self.elasticsearch_adaptor: self.elasticsearch_adaptor.create_update_path( db_record.get_path_id(), self._generate_file_status_record( db_record, "located") ) db.session.commit() return { "request-id": str(request_id), "file-id": db_record.id } except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) return {'message': 'Something went wrong: ' + str(exc_value)}, 500
def test_get_errors(self, mocker, mock_rabbit_adaptor): import servicex mock_transform_request_read = mocker.patch.object( servicex.models.TransformRequest, 'return_request', return_value=self._generate_transform_request()) file_error_result = [(DatasetFile(file_path="/foo.bar/baz.root", file_events=42), FileStatus(pod_name='openthepodhal', info="sorry I can't"))] mock_transform_errors = mocker.patch.object( servicex.models.FileStatus, 'failures_for_request', return_value=file_error_result) client = self._test_client(rabbit_adaptor=mock_rabbit_adaptor) response = client.get('/servicex/transformation/1234/errors') assert response.status_code == 200 assert response.json == { 'errors': [{ 'pod-name': 'openthepodhal', 'file': '/foo.bar/baz.root', 'events': 42, 'info': "sorry I can't" }] } mock_transform_request_read.assert_called_with("1234") mock_transform_errors.assert_called_with("1234")
def _generate_dataset_file(self): mock_dataset_file = DatasetFile() mock_dataset_file.adler32 = '123-455' mock_dataset_file.file_size = 0 mock_dataset_file.file_events = 0 mock_dataset_file.file_path = '/foo/bar.root' mock_dataset_file.request_id = 'BR549' return mock_dataset_file
def put(self, request_id): info = request.get_json() submitted_request = TransformRequest.return_request(request_id) dataset_file = DatasetFile.get_by_id(info['file-id']) rec = TransformationResult( did=submitted_request.did, file_id=dataset_file.id, request_id=request_id, file_path=info['file-path'], transform_status=info['status'], transform_time=info['total-time'], total_bytes=info['total-bytes'], total_events=info['total-events'], avg_rate=info['avg-rate'], messages=info['num-messages'] ) rec.save_to_db() if self.elasticsearch_adapter: self.elasticsearch_adapter.create_update_path( dataset_file.get_path_id(), self._generate_file_status_record(dataset_file, info['status'])) self.elasticsearch_adapter.create_update_request( request_id, self._generate_transformation_record(submitted_request, 'transforming')) files_remaining = TransformRequest.files_remaining(request_id) if files_remaining is not None and files_remaining <= 0: namespace = current_app.config['TRANSFORMER_NAMESPACE'] print("Job is all done... shutting down transformers") self.transformer_manager.shutdown_transformer_job(request_id, namespace) submitted_request.status = "Complete" submitted_request.save_to_db() if self.elasticsearch_adapter: self.elasticsearch_adapter.create_update_request( request_id, self._generate_transformation_record(submitted_request, 'complete')) print(info) db.session.commit() return "Ok"
def post(self): try: transformation_request = parser.parse_args() print("object store ", self.object_store) request_id = str(uuid.uuid4()) time = datetime.now(tz=timezone.utc) requested_did = transformation_request['did'] \ if 'did' in transformation_request else None requested_file_list = transformation_request['file-list'] \ if 'file-list' in transformation_request else None # requested_did xor requested_file_list if bool(requested_did) == bool(requested_file_list): raise BadRequest("Must provide did or file-list but not both") if self.object_store and \ transformation_request['result-destination'] == 'object-store': self.object_store.create_bucket(request_id) # WHat happens if object-store and object_store is None? if transformation_request['result-destination'] == 'kafka': broker = transformation_request['kafka']['broker'] else: broker = None request_rec = TransformRequest( did=requested_did if requested_did else "File List Provided in Request", submit_time=time, columns=transformation_request['columns'], selection=transformation_request['selection'], tree_name=transformation_request['tree-name'], request_id=str(request_id), image=transformation_request['image'], chunk_size=transformation_request['chunk-size'], result_destination=transformation_request[ 'result-destination'], result_format=transformation_request['result-format'], kafka_broker=broker, workers=transformation_request['workers'], workflow_name=_workflow_name(transformation_request)) # If we are doing the xaod_cpp workflow, then the first thing to do is make # sure the requested selection is correct, and generate the C++ files if request_rec.workflow_name == 'selection_codegen': namespace = current_app.config['TRANSFORMER_NAMESPACE'] request_rec.generated_code_cm = \ self.code_gen_service.generate_code_for_selection(request_rec, namespace) # Create queue for transformers to read from self.rabbitmq_adaptor.setup_queue(request_id) self.rabbitmq_adaptor.bind_queue_to_exchange( exchange="transformation_requests", queue=request_id) # Also setup an error queue for dead letters generated by transformer self.rabbitmq_adaptor.setup_queue(request_id + "_errors") self.rabbitmq_adaptor.bind_queue_to_exchange( exchange="transformation_failures", queue=request_id + "_errors") request_rec.save_to_db() if requested_did: did_request = { "request_id": request_rec.request_id, "did": request_rec.did, "service-endpoint": self._generate_advertised_endpoint( "servicex/transformation/" + request_rec.request_id) } self.rabbitmq_adaptor.basic_publish( exchange='', routing_key='did_requests', body=json.dumps(did_request)) else: # Request a preflight check on the first file self.lookup_result_processor.publish_preflight_request( request_rec, requested_file_list[0]) for file_path in requested_file_list: file_record = DatasetFile(request_id=request_id, file_path=file_path, adler32="xxx", file_events=0, file_size=0) self.lookup_result_processor.add_file_to_dataset( request_rec, file_record) self.lookup_result_processor.report_fileset_complete( request_rec, num_files=len(requested_file_list)) db.session.commit() if self.elasticsearch_adapter: self.elasticsearch_adapter.create_update_request( request_id, self._generate_transformation_record( request_rec, "locating DID")) return {"request_id": str(request_id)} except BadRequest as bad_request: return { 'message': f'The json request was malformed: {str(bad_request)}' }, 400 except ValueError as eek: return { 'message': f'Failed to submit transform request: {str(eek)}' }, 400 except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=20, file=sys.stdout) print(exc_value) return {'message': 'Something went wrong'}, 500