示例#1
0
    def test_add_file_to_dataset_kafka(self, mocker, mock_rabbit_adaptor):
        processor = LookupResultProcessor(mock_rabbit_adaptor, None,
                                          "http://cern.analysis.ch:5000/")
        dataset_file = DatasetFile(request_id="BR549",
                                   file_path="/foo/bar.root",
                                   adler32='12345',
                                   file_size=1024,
                                   file_events=500)

        dataset_file.id = 42
        dataset_file.save_to_db = mocker.Mock()
        processor.add_file_to_dataset(self._generate_transform_request(),
                                      dataset_file)

        dataset_file.save_to_db.assert_called()
        mock_rabbit_adaptor.basic_publish.assert_called_with(
            exchange='transformation_requests',
            routing_key='BR549',
            body=json.dumps({
                "request-id": 'BR549',
                "file-id": 42,
                "columns": 'electron.eta(), muon.pt()',
                "file-path": "/foo/bar.root",
                "tree-name": "Events",
                "service-endpoint":
                "http://cern.analysis.ch:5000/servicex/transformation/BR549",
                'result-destination': 'kafka',
                'kafka-broker': 'http://ssl-hep.org.kafka:12345'
            }))
示例#2
0
    def put(self, request_id):
        try:
            from servicex.models import db
            add_file_request = request.get_json()
            submitted_request = TransformRequest.return_request(request_id)

            db_record = DatasetFile(request_id=request_id,
                                    file_path=add_file_request['file_path'],
                                    adler32=add_file_request['adler32'],
                                    file_events=add_file_request['file_events'],
                                    file_size=add_file_request['file_size'])

            self.lookup_result_processor.add_file_to_dataset(submitted_request, db_record)

            if self.elasticsearch_adaptor:
                self.elasticsearch_adaptor.create_update_path(
                    db_record.get_path_id(),
                    self._generate_file_status_record(
                        db_record, "located")
                )
            db.session.commit()

            return {
                "request-id": str(request_id),
                "file-id": db_record.id
            }

        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
            print(exc_value)
            return {'message': 'Something went wrong: ' + str(exc_value)}, 500
    def test_get_errors(self, mocker, mock_rabbit_adaptor):
        import servicex

        mock_transform_request_read = mocker.patch.object(
            servicex.models.TransformRequest,
            'return_request',
            return_value=self._generate_transform_request())

        file_error_result = [(DatasetFile(file_path="/foo.bar/baz.root",
                                          file_events=42),
                              FileStatus(pod_name='openthepodhal',
                                         info="sorry I can't"))]
        mock_transform_errors = mocker.patch.object(
            servicex.models.FileStatus,
            'failures_for_request',
            return_value=file_error_result)

        client = self._test_client(rabbit_adaptor=mock_rabbit_adaptor)

        response = client.get('/servicex/transformation/1234/errors')
        assert response.status_code == 200
        assert response.json == {
            'errors': [{
                'pod-name': 'openthepodhal',
                'file': '/foo.bar/baz.root',
                'events': 42,
                'info': "sorry I can't"
            }]
        }

        mock_transform_request_read.assert_called_with("1234")
        mock_transform_errors.assert_called_with("1234")
 def _generate_dataset_file(self):
     mock_dataset_file = DatasetFile()
     mock_dataset_file.adler32 = '123-455'
     mock_dataset_file.file_size = 0
     mock_dataset_file.file_events = 0
     mock_dataset_file.file_path = '/foo/bar.root'
     mock_dataset_file.request_id = 'BR549'
     return mock_dataset_file
    def put(self, request_id):
        info = request.get_json()
        submitted_request = TransformRequest.return_request(request_id)
        dataset_file = DatasetFile.get_by_id(info['file-id'])

        rec = TransformationResult(
            did=submitted_request.did,
            file_id=dataset_file.id,
            request_id=request_id,
            file_path=info['file-path'],
            transform_status=info['status'],
            transform_time=info['total-time'],
            total_bytes=info['total-bytes'],
            total_events=info['total-events'],
            avg_rate=info['avg-rate'],
            messages=info['num-messages']
        )
        rec.save_to_db()

        if self.elasticsearch_adapter:
            self.elasticsearch_adapter.create_update_path(
                dataset_file.get_path_id(),
                self._generate_file_status_record(dataset_file, info['status']))

            self.elasticsearch_adapter.create_update_request(
                request_id,
                self._generate_transformation_record(submitted_request, 'transforming'))

        files_remaining = TransformRequest.files_remaining(request_id)
        if files_remaining is not None and files_remaining <= 0:
            namespace = current_app.config['TRANSFORMER_NAMESPACE']
            print("Job is all done... shutting down transformers")
            self.transformer_manager.shutdown_transformer_job(request_id, namespace)
            submitted_request.status = "Complete"
            submitted_request.save_to_db()

            if self.elasticsearch_adapter:
                self.elasticsearch_adapter.create_update_request(
                    request_id,
                    self._generate_transformation_record(submitted_request, 'complete'))

        print(info)
        db.session.commit()

        return "Ok"
    def post(self):
        try:
            transformation_request = parser.parse_args()
            print("object store ", self.object_store)

            request_id = str(uuid.uuid4())
            time = datetime.now(tz=timezone.utc)

            requested_did = transformation_request['did'] \
                if 'did' in transformation_request else None
            requested_file_list = transformation_request['file-list'] \
                if 'file-list' in transformation_request else None

            # requested_did xor requested_file_list
            if bool(requested_did) == bool(requested_file_list):
                raise BadRequest("Must provide did or file-list but not both")

            if self.object_store and \
                    transformation_request['result-destination'] == 'object-store':
                self.object_store.create_bucket(request_id)
                # WHat happens if object-store and object_store is None?

            if transformation_request['result-destination'] == 'kafka':
                broker = transformation_request['kafka']['broker']
            else:
                broker = None

            request_rec = TransformRequest(
                did=requested_did
                if requested_did else "File List Provided in Request",
                submit_time=time,
                columns=transformation_request['columns'],
                selection=transformation_request['selection'],
                tree_name=transformation_request['tree-name'],
                request_id=str(request_id),
                image=transformation_request['image'],
                chunk_size=transformation_request['chunk-size'],
                result_destination=transformation_request[
                    'result-destination'],
                result_format=transformation_request['result-format'],
                kafka_broker=broker,
                workers=transformation_request['workers'],
                workflow_name=_workflow_name(transformation_request))

            # If we are doing the xaod_cpp workflow, then the first thing to do is make
            # sure the requested selection is correct, and generate the C++ files
            if request_rec.workflow_name == 'selection_codegen':
                namespace = current_app.config['TRANSFORMER_NAMESPACE']
                request_rec.generated_code_cm = \
                    self.code_gen_service.generate_code_for_selection(request_rec, namespace)

            # Create queue for transformers to read from
            self.rabbitmq_adaptor.setup_queue(request_id)

            self.rabbitmq_adaptor.bind_queue_to_exchange(
                exchange="transformation_requests", queue=request_id)

            # Also setup an error queue for dead letters generated by transformer
            self.rabbitmq_adaptor.setup_queue(request_id + "_errors")

            self.rabbitmq_adaptor.bind_queue_to_exchange(
                exchange="transformation_failures",
                queue=request_id + "_errors")

            request_rec.save_to_db()

            if requested_did:
                did_request = {
                    "request_id":
                    request_rec.request_id,
                    "did":
                    request_rec.did,
                    "service-endpoint":
                    self._generate_advertised_endpoint(
                        "servicex/transformation/" + request_rec.request_id)
                }

                self.rabbitmq_adaptor.basic_publish(
                    exchange='',
                    routing_key='did_requests',
                    body=json.dumps(did_request))
            else:
                # Request a preflight check on the first file
                self.lookup_result_processor.publish_preflight_request(
                    request_rec, requested_file_list[0])

                for file_path in requested_file_list:
                    file_record = DatasetFile(request_id=request_id,
                                              file_path=file_path,
                                              adler32="xxx",
                                              file_events=0,
                                              file_size=0)
                    self.lookup_result_processor.add_file_to_dataset(
                        request_rec, file_record)

                self.lookup_result_processor.report_fileset_complete(
                    request_rec, num_files=len(requested_file_list))

            db.session.commit()

            if self.elasticsearch_adapter:
                self.elasticsearch_adapter.create_update_request(
                    request_id,
                    self._generate_transformation_record(
                        request_rec, "locating DID"))

            return {"request_id": str(request_id)}
        except BadRequest as bad_request:
            return {
                'message':
                f'The json request was malformed: {str(bad_request)}'
            }, 400
        except ValueError as eek:
            return {
                'message': f'Failed to submit transform request: {str(eek)}'
            }, 400
        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_tb(exc_traceback, limit=20, file=sys.stdout)
            print(exc_value)
            return {'message': 'Something went wrong'}, 500