def __init__( self, view: Optional[str] = None, name: Optional[str] = None, mode: str = MODE_ERROR, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, limit: int = -1, ): assert mode in FrameworkBaseExporter.MODE_CHOICES super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) self.mode: Param[str] = Param(self, "mode", "") self._setDefault(mode=mode) self.limit: Param[int] = Param(self, "limit", "") self._setDefault(limit=None) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, enable: Union[bool, Callable[[DataFrame], bool]], stages: Union[List[Transformer], Callable[[], List[Transformer]]], else_stages: Optional[ Union[List[Transformer], Callable[[], List[Transformer]]] ] = None, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): """ If enable flag is true then runs stages else runs else_stages :param enable: a boolean or a function that takes a DataFrame and returns a boolean :param stages: list of transformers or a function that returns a list of transformers :param else_stages: list of transformers or a function that returns a list of transformers """ super().__init__( name=name, parameters=parameters, progress_logger=progress_logger ) self.logger = get_logger(__name__) self.enable: Union[bool, Callable[[DataFrame], bool]] = enable self.stages: Union[List[Transformer], Callable[[], List[Transformer]]] = stages self.else_stages: Optional[ Union[List[Transformer], Callable[[], List[Transformer]]] ] = else_stages kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__(self, view: str = None, drop_columns: Optional[List[str]] = None, keep_columns: Optional[List[str]] = None, name: str = None, parameters: Dict[str, Any] = None, progress_logger: ProgressLogger = None, verify_count_remains_same: bool = False) -> None: super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) if not view: raise ValueError("view is None or empty") self.view: Param = Param(self, "view", "") self._setDefault(view=None) # type: ignore self.drop_columns: Param = Param(self, "drop_columns", "") self._setDefault(drop_columns=None) # type: ignore self.keep_columns: Param = Param(self, "keep_columns", "") self._setDefault(keep_columns=None) # type: ignore self.verify_count_remains_same: Param = Param( self, "verify_count_remains_same", "") self._setDefault(verify_count_remains_same=None) # type: ignore kwargs = self._input_kwargs # type: ignore self.setParams(**kwargs)
def __init__( self, server_url: str, relative_url: str, method: str, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) # add a param self.server_url: Param[str] = Param(self, "server_url", "") self._setDefault(server_url=server_url) self.relative_url: Param[str] = Param(self, "relative_url", "") self._setDefault(relative_url=relative_url) self.method: Param[str] = Param(self, "method", "") self._setDefault(method=method) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, view: str, file_path: Union[str, List[str], Path], name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, merge_schema: bool = False, limit: int = -1, ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) self.file_path: Param[str] = Param(self, "file_path", "") self._setDefault(file_path=None) self.merge_schema: Param[bool] = Param(self, "merge_schema", "") self._setDefault(merge_schema=None) self.limit: Param[int] = Param(self, "limit", "") self._setDefault(limit=None) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, # add your parameters here (be sure to add them to setParams below too) jdbc_url: str, query: str, driver: str, view: Optional[str] = None, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) assert jdbc_url assert query assert driver self.logger = get_logger(__name__) # add a param self.jdbc_url: Param[str] = Param(self, "jdbc_url", "") self._setDefault(jdbc_url=jdbc_url) self.query: Param[str] = Param(self, "query", "") self._setDefault(query=query) self.driver: Param[str] = Param(self, "driver", "") self._setDefault(driver=driver) self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__(self, view: str, file_path: Union[str, List[str], Path], name: str = None, parameters: Dict[str, Any] = None, progress_logger: ProgressLogger = None, merge_schema: bool = False, limit: int = -1): super(FrameworkParquetLoader, self).__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) self.view: Param = Param(self, "view", "") self._setDefault(view=view) # type: ignore self.file_path: Param = Param(self, "file_path", "") self._setDefault(file_path=None) # type: ignore self.merge_schema: Param = Param(self, "merge_schema", "") self._setDefault(merge_schema=None) # type: ignore self.limit: Param = Param(self, "limit", "") self._setDefault(limit=None) # type: ignore kwargs = self._input_kwargs # type: ignore self.setParams(**kwargs)
def __init__( self, file_path: Union[str, Path], view: Optional[str] = None, name: Optional[str] = None, mode: str = FrameworkBaseExporter.MODE_ERROR, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, limit: int = -1, ) -> None: super().__init__( view=view, name=name, mode=mode, parameters=parameters, progress_logger=progress_logger, limit=limit, ) assert isinstance(file_path, Path) or isinstance(file_path, str) assert file_path self.logger = get_logger(__name__) self.file_path: Param[Union[str, Path]] = Param(self, "file_path", "") self._setDefault(file_path=None) self._set(file_path=file_path)
def __init__( self, # add your parameters here (be sure to add them to setParams below too) view: str, columns: List[str], name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) assert view assert columns assert isinstance(columns, list) # add a param self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) self.columns: Param[List[str]] = Param(self, "columns", "") self._setDefault(columns=columns) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__(self, file_path: Union[str, Path], view: str = None, name: str = None, parameters: Dict[str, Any] = None, progress_logger: ProgressLogger = None, limit: int = -1 ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) assert isinstance(file_path, Path) or isinstance(file_path, str) assert file_path self.logger = get_logger(__name__) self.view: Param = Param(self, "view", "") self._setDefault(view=view) # type: ignore self.file_path: Param = Param(self, "file_path", "") self._setDefault(file_path=None) # type: ignore self.limit: Param = Param(self, "limit", "") self._setDefault(limit=None) # type: ignore kwargs = self._input_kwargs # type: ignore self.setParams(**kwargs)
def test_source_api_call(spark_session: SparkSession) -> None: test_path: Path = Path(__file__).parent.joinpath("./") test_name = "test_source_api_call" mock_server_url = "http://mock-server:1080" mock_client = MockServerFriendlyClient(mock_server_url) mock_client.clear(test_name) input_file = input_types.FileInput() request = input_types.ApiJsonResponse( response_data_folder="api_json_response") logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, test_validators=None, logger=logger, auto_find_helix_transformer=False, helix_transformers=[FeaturesComplexFeature], mock_client=mock_client, test_inputs=[input_file, request], temp_folder="output/temp", ).run_test2() with open(test_path.joinpath( "api_json_response/getProviderApptTypes.json")) as f: content = json.load(f) response: Response = requests.get( f"{mock_server_url}/{test_name}/getProviderApptTypes") assert response.json() == content
def __init__( self, view: str, desired_partitions: Optional[int] = None, partition_by: Optional[List[str]] = None, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__( name=name, parameters=parameters, progress_logger=progress_logger ) self.logger = get_logger(__name__) # add a param self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) self.desired_partitions: Param[Optional[int]] = Param( self, "desired_partitions", "" ) self._setDefault(desired_partitions=desired_partitions) self.partition_by: Param[Optional[List[str]]] = Param(self, "partition_by", "") self._setDefault(partition_by=partition_by) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, # add your parameters here (be sure to add them to setParams below too) column: str, include_only: List[Union[str, int, float]], view: str, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) assert column self.column: Param[str] = Param(self, "column", "") self._setDefault(column=column) assert view self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) assert include_only and isinstance(include_only, list) self.include_only: Param[List[Union[str, int, float]]] = Param( self, "include_only", "") self._setDefault(include_only=include_only) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, username: str, password: str, host: str, port: int, query: str, db_name: Optional[str] = None, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) assert username assert password assert host assert port assert query self.logger = get_logger(__name__) self.username: Param[str] = Param(self, "username", "") # noinspection Mypy self._setDefault(username=username) self.password: Param[str] = Param(self, "password", "") # noinspection Mypy self._setDefault(password=password) self.host: Param[str] = Param(self, "host", "") # noinspection Mypy self._setDefault(host=host) self.port: Param[int] = Param(self, "port", "") # noinspection Mypy self._setDefault(port=port) self.query: Param[str] = Param(self, "query", "") # noinspection Mypy self._setDefault(query=query) self.db_name: Param[Optional[str]] = Param(self, "db_name", "") # noinspection Mypy self._setDefault(db_name=None) # noinspection Mypy self._set(**self._input_kwargs) super().setStandardParams(parameters=parameters, progress_logger=progress_logger)
def __init__(self, view: str, path_to_csv: Union[str, List[str], Path], delimiter: str = ",", limit: int = -1, has_header: bool = True, infer_schema: bool = False, cache_table: bool = True, schema: StructType = None, create_file_path: bool = False, name: str = None, parameters: Dict[str, Any] = None, progress_logger: Optional[ProgressLogger] = None) -> None: super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger: Logger = get_logger(__name__) self.view: Param = Param(self, "view", "") self._setDefault(view=None) # type: ignore self.path_to_csv: Param = Param(self, "path_to_csv", "") self._setDefault(path_to_csv=None) # type: ignore self.delimiter: Param = Param(self, "delimiter", "") self._setDefault(delimiter=",") # type: ignore self.schema: Param = Param(self, "schema", "") self._setDefault(schema=None) # type: ignore self.cache_table: Param = Param(self, "cache_table", "") self._setDefault(cache_table=True) # type: ignore self.has_header: Param = Param(self, "has_header", "") self._setDefault(has_header=True) # type: ignore self.limit: Param = Param(self, "limit", "") self._setDefault(limit=-1) # type: ignore self.infer_schema: Param = Param(self, "infer_schema", "") self._setDefault(infer_schema=False) # type: ignore self.create_file_path: Param = Param(self, "create_file_path", "") self._setDefault(create_file_path=False) # type: ignore if not path_to_csv: raise ValueError("path_to_csv is None or empty") self.logger.info(f"Received path_to_csv: {path_to_csv}") kwargs = self._input_kwargs # type: ignore self.setParams(**kwargs)
def __init__( self, view: str, analysis_views_prefix: Optional[str] = None, output_folder: Optional[Union[Path, str]] = None, columns_to_analyze: Optional[List[str]] = None, columns_to_skip: Optional[List[str]] = None, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): """ For each column in the view, this transformer can either: 1. Create a view that contains the unique values and the count of each 2. Write a csv to output_folder that contains the unique values and the count of each :param view: view to load the data from :param analysis_views_prefix: (Optional) prefix to use when creating the analysis views :param output_folder: (Optional) folder in which to create the csvs :param columns_to_analyze: (Optional) limit analysis to these columns :param columns_to_skip: (Optional) don't include these columns in analysis """ super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) assert view assert output_folder self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) self.analysis_views_prefix: Param[Optional[str]] = Param( self, "analysis_views_prefix", "") self._setDefault(analysis_views_prefix=analysis_views_prefix) self.output_folder: Param[Optional[Union[Path, str]]] = Param( self, "output_folder", "") self._setDefault(output_folder=output_folder) self.columns_to_analyze: Param[Optional[List[str]]] = Param( self, "columns_to_analyze", "") self._setDefault(columns_to_analyze=columns_to_analyze) self.columns_to_skip: Param[Optional[List[str]]] = Param( self, "columns_to_skip", "") self._setDefault(columns_to_skip=columns_to_skip) kwargs = self._input_kwargs self.setParams(**kwargs)
def test_practitioner(spark_session: SparkSession) -> None: test_path: Path = Path(__file__).parent.joinpath("./") # setup servers test_name = "practitioner" input_file = FileInput() logger = get_logger(__name__) logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, test_validators=[ OutputFileValidator(related_inputs=input_file, sort_output_by=["id"]) ], logger=logger, auto_find_helix_transformer=False, helix_transformers=[FeaturesDoctorFeaturePractitionerV1], test_inputs=[input_file], temp_folder="output/temp", ).run_test2()
def __init__(self, name: str = None, parameters: Dict[str, Any] = None, progress_logger: ProgressLogger = None): super(FrameworkTransformer, self).__init__() self.logger = get_logger(__name__) self.name: Param = Param(self, "name", "") self._setDefault(name=None) # type: ignore self.progress_logger: Param = Param(self, "progress_logger", "") self._setDefault(progress_logger=None) # type: ignore self.parameters: Param = Param(self, "parameters", "") self._setDefault(parameters=None) # type: ignore
def test_mock_fhir_graph_request(spark_session: SparkSession) -> None: """ expect 2 $graph calls to be made to get Organization data """ test_path: Path = Path(__file__).parent.joinpath("./") test_name = "test_mock_fhir_graph_request" mock_server_url = "http://mock-server:1080" mock_client = MockServerFriendlyClient(mock_server_url) mock_client.clear(f"/{test_name}/") fhir_calls = MockRequestResponseCalls( fhir_calls_folder=f"{test_path}/mock_request_responses/Organization", mock_url_prefix=f"{test_name}/4_0_0/Organization", url_suffix="$graph", ) test_validators = MockCallValidator(related_inputs=fhir_calls) params = { "test_name": test_name, "mock_server_url": mock_server_url, "url_fhir_segments": "Organization/$graph", "files_path": [ test_path.joinpath( "mock_request_responses/Organization/1023011178.json"), test_path.joinpath( "mock_request_responses/Organization/1841293990.json"), ], } logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, logger=logger, auto_find_helix_transformer=False, helix_transformers=[PipelineMockRequestResponsesV1], mock_client=mock_client, test_validators=[test_validators], test_inputs=[fhir_calls], temp_folder="temp", helix_pipeline_parameters=params, ).run_test2()
def __init__( self, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super(FrameworkTransformer, self).__init__() if TYPE_CHECKING: self._input_kwargs: Dict[str, Any] = {} self.logger = get_logger(__name__) self.name: Param[str] = Param(self, "name", "") self._setDefault(name=name) self.progress_logger: Optional[ProgressLogger] = progress_logger self.parameters: Optional[Dict[str, Any]] = parameters
def test_json_feature(spark_session: SparkSession) -> None: test_path: Path = Path(__file__).parent.joinpath("./") test_name = "test_json_feature" input_file = FileInput() logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, test_validators=[OutputFileValidator(related_inputs=input_file)], logger=logger, auto_find_helix_transformer=False, helix_transformers=[FeaturesPeopleJsonFeature], test_inputs=[input_file], temp_folder="output/temp", ).run_test2()
def test_practitioner_fail_on_fhir_validation(spark_session: SparkSession) -> None: test_path: Path = Path(__file__).parent.joinpath("./") # setup servers test_name = "practitioner_fail_on_fhir_validation" test_input = FileInput() test_fhir = FhirCalls() logger = get_logger(__name__) mock_server_url = "http://mock-server:1080" mock_client = MockServerFriendlyClient(mock_server_url) mock_client.clear(f"/{test_name}/") mock_client.expect_default() params = { "test_name": test_name, "mock_server_url": mock_server_url, } test_validator = FhirValidator( related_inputs=test_fhir, related_file_inputs=test_input, mock_server_url=mock_server_url, test_name=test_name, fhir_validation_url="http://fhir:3000/4_0_0", ) with pytest.raises(AssertionError, match=r"Failed validation for resource*"): SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, test_validators=[test_validator], logger=logger, auto_find_helix_transformer=False, helix_transformers=[ FeaturesDoctorFeaturePractitionerFailOnFhirValidationV1 ], test_inputs=[test_input], temp_folder="output/temp", helix_pipeline_parameters=params, ).run_test2()
def __init__( self, validation_source_path: str, validation_queries: List[str], fail_on_validation: bool = False, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ) -> None: """ :param validation_source_path: the root path to the validation queries :param validation_queries: a list of validation queries paths relative to the root path, can be the path of a file relative to the root path, or a path to a directory relative to root path :param fail_on_validation: if True, fails the pipeline at this transformer, defaults to False :param name: a name for the transformer step :param parameters: parameters :param progress_logger: the logger to use for logging """ super().__init__( name=name, parameters=parameters, progress_logger=progress_logger ) self.logger = get_logger(__name__) if not validation_source_path: raise ValueError("validation source path is None or empty") if not validation_queries: raise ValueError("validation_queries is None") self.validation_source_path: Param[str] = Param( self, "validation_source_path", "" ) self._setDefault(validation_source_path=None) self.validation_queries: Param[List[str]] = Param( self, "validation_queries", "" ) self._setDefault(validation_queries=None) self.fail_on_validation: Param[bool] = Param(self, "fail_on_validation", "") self._setDefault(fail_on_validation=False) kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, view: str, filepath: Union[str, List[str], Path], row_tag: str, schema: Optional[StructType] = None, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ) -> None: """ Initializes the framework_xml_loader :param view: The name of the view that the resultant DataFrame will be stored in :param filepath: The path to the xml file to load :param row_tag: The row tag of your xml files to treat as a row :param schema: The schema to apply to the DataFrame, if not passed schema will be inferred :param name: sets the name of the transformer as it will appear in logs :param parameters: :param progress_logger: """ super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger: Logger = get_logger(__name__) self.view: Param[str] = Param(self, "view", "") self._setDefault(view=None) self.filepath: Param[str] = Param(self, "filepath", "") self._setDefault(filepath=None) self.row_tag: Param[str] = Param(self, "row_tag", "") self._setDefault(row_tag=None) self.schema: Param[StructType] = Param(self, "schema", "") self._setDefault(schema=None) if not filepath: raise ValueError("filepath is None or empty") self.logger.info(f"Received filepath: {filepath}") kwargs = self._input_kwargs self.setParams(**kwargs)
def __init__( self, view: str, columns_to_check: List[str], name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, ): super().__init__() self.logger = get_logger(__name__) self.view: Param[str] = Param(self, "view", "") self._setDefault(view=view) self.columns_to_check: Param[List[str]] = Param(self, "columns_to_check", "") self._setDefault(columns_to_check=columns_to_check) kwargs = self._input_kwargs self.setParams(**kwargs)
def test_practitioner_fail_on_schema(spark_session: SparkSession) -> None: with pytest.raises(SparkPipelineFrameworkTestingException): try: test_path: Path = Path(__file__).parent.joinpath("./") test_name = "test_practitioner_fail_on_schema" FileInput() input_file = FileInput() logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, test_validators=[ OutputFileValidator( related_inputs=input_file, func_path_modifier=lambda x: Path( str(x).replace(str(test_path), "/foo/") ), ) ], logger=logger, auto_find_helix_transformer=False, helix_transformers=[FeaturesDoctorFeaturePractitionerFailOnSchemaV1], test_inputs=[input_file], temp_folder="output/temp", capture_exceptions=False, ).run_test2() except SparkPipelineFrameworkTestingException as e: assert len(e.exceptions) == 1 assert e.exceptions[0].result_path == Path("/foo/output/temp/output.json") assert e.exceptions[0].expected_path == Path("/foo/output/output.json") assert e.exceptions[0].compare_path == Path( "/foo/output/temp/compare_schema_output.json.command" ) compare_file_full_path = test_path.joinpath( "output/temp/compare_schema_output.json.command" ) with open(compare_file_full_path, "r") as file: print(f"------- compare file: {compare_file_full_path} ---------") print(file.read()) raise e
def __init__( self, sql: Optional[str] = None, view: Optional[str] = None, log_sql: bool = False, name: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, verify_count_remains_same: bool = False, mapping_file_name: Optional[str] = None, ) -> None: super().__init__(name=name, parameters=parameters, progress_logger=progress_logger) self.logger = get_logger(__name__) if not sql: raise ValueError("sql is None or empty") if not view: raise ValueError("view is None or empty") self.sql: Param[Optional[str]] = Param(self, "sql", "") self._setDefault(sql=None) self.view: Param[Optional[str]] = Param(self, "view", "") self._setDefault(view=None) self.mapping_file_name: Param[Optional[str]] = Param( self, "mapping_file_name", "") self._setDefault(mapping_file_name=None) self.log_sql: Param[bool] = Param(self, "log_sql", "") self._setDefault(log_sql=False) self.verify_count_remains_same: Param[bool] = Param( self, "verify_count_remains_same", "") self._setDefault(verify_count_remains_same=None) kwargs = self._input_kwargs self.setParams(**kwargs)
def test_fhir_mock(spark_session: SparkSession) -> None: test_path: Path = Path(__file__).parent.joinpath("./") test_name = "test_fhir_mock" mock_server_url = "http://mock-server:1080" mock_client = MockServerFriendlyClient(mock_server_url) mock_client.clear(f"/{test_name}/") fhir_calls = FhirCalls() test_validators = MockCallValidator(related_inputs=fhir_calls) params = { "test_name": test_name, "mock_server_url": mock_server_url, "files_path": [ test_path.joinpath( "fhir_calls/healthcare_service/1629334859-TT3-GPPC.json"), test_path.joinpath( "fhir_calls/healthcare_service/1790914448-TT4-GPPC.json"), test_path.joinpath( "fhir_calls/location/Medstar-Alias-TT3-GPPC.json"), test_path.joinpath( "fhir_calls/location/Medstar-Alias-TT4-GPPC.json"), ], } logger = get_logger(__name__) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=test_path, test_name=test_name, test_validators=[test_validators], logger=logger, auto_find_helix_transformer=False, helix_transformers=[PipelineFhirCallsFhirMockV1], mock_client=mock_client, test_inputs=[fhir_calls], temp_folder="temp", helix_pipeline_parameters=params, ).run_test2()
def test_doctor_feature_practitioner(spark_session: SparkSession) -> None: data_dir: Path = Path(__file__).parent.joinpath("./") test_name = "test_doctor_feature_practitioner" test_input = input_types.FileInput() test_fhir = input_types.FhirCalls() logger = get_logger(__name__) mock_server_url = "http://mock-server:1080" mock_client = MockServerFriendlyClient(mock_server_url) mock_client.clear(f"/{test_name}/") mock_client.expect_default() params = { "test_name": test_name, "mock_server_url": mock_server_url, } test_validator = FhirValidator( related_inputs=test_fhir, related_file_inputs=test_input, mock_server_url=mock_server_url, test_name=test_name, fhir_validation_url="http://fhir:3000/4_0_0", ) SparkPipelineFrameworkTestRunnerV2( spark_session=spark_session, test_path=data_dir, test_name=test_name, test_validators=[ test_validator, # OutputFileValidator(related_inputs=test_input, sort_output_by=["id"]), ], logger=logger, test_inputs=[test_input], temp_folder="output/temp", mock_client=mock_client, helix_pipeline_parameters=params, ).run_test2()
def __init__( self, jdbc_url: str, table: str, driver: str, view: Optional[str] = None, name: Optional[str] = None, mode: str = FrameworkBaseExporter.MODE_ERROR, parameters: Optional[Dict[str, Any]] = None, progress_logger: Optional[ProgressLogger] = None, limit: int = -1, options: Dict[str, Any] = {}, ): super().__init__( view=view, name=name, mode=mode, parameters=parameters, progress_logger=progress_logger, limit=limit, ) assert jdbc_url assert table assert driver self.logger = get_logger(__name__) self.options: Param[Dict[str, Any]] = Param(self, "options", "") self._setDefault(options=options) self.jdbc_url: Param[str] = Param(self, "jdbc_url", "") self._setDefault(jdbc_url=jdbc_url) self.table: Param[str] = Param(self, "table", "") self._setDefault(table=table) self.driver: Param[str] = Param(self, "driver", "") self._setDefault(driver=driver) self._set(jdbc_url=jdbc_url, table=table, driver=driver, options=options)