def test_auto_mapper_string_after_delimiter(spark_session: SparkSession) -> None: # Arrange spark_session.createDataFrame( [ (1, "Qure/hi", "Imran", "1970-01-01"), (2, "Vidal", "Michael", "1970-02-02"), ], ["member_id", "last_name", "first_name", "date_of_birth"], ).createOrReplaceTempView("patients") source_df: DataFrame = spark_session.table("patients") df = source_df.select("member_id") df.createOrReplaceTempView("members") # Act mapper = AutoMapper( view="members", source_view="patients", keys=["member_id"] ).columns(my_column=A.string_after_delimiter(A.column("last_name"), "/")) assert isinstance(mapper, AutoMapper) sql_expressions: Dict[str, Column] = mapper.get_column_specs(source_df=source_df) for column_name, sql_expression in sql_expressions.items(): print(f"{column_name}: {sql_expression}") assert_compare_expressions( sql_expressions["my_column"], substring_index(col("b.last_name"), "/", -1).alias("my_column"), ) result_df: DataFrame = mapper.transform(df=df) # Assert result_df.printSchema() result_df.show() assert result_df.where("member_id == 1").select("my_column").collect()[0][0] == "hi" assert ( result_df.where("member_id == 2").select("my_column").collect()[0][0] == "Vidal" )
def test_auto_mapper_hir_period_uses_date(spark_session: SparkSession) -> None: data_dir: Path = Path(__file__).parent.joinpath("./") temp_folder = data_dir.joinpath("./temp") if path.isdir(temp_folder): rmtree(temp_folder) encounter_test_folder: Path = data_dir.joinpath("test_files").joinpath( "encounter.json") minified_json_path: Path = create_jsonl_files( src_file=encounter_test_folder, dst_folder=temp_folder.joinpath("minified_period"), dst_file_name="1.json", ) df = spark_session.read.json(str(minified_json_path)) df.createOrReplaceTempView("encounters") mapper = AutoMapper( view="fhir_encounters", source_view="encounters", copy_all_unmapped_properties=True, ).complex( Encounter( use_date_for=["encounter.period.start", "encounter.period.end"], id_=FhirId(A.concat("pat", A.column("id"))), status=EncounterStatusCode(A.column("status")), class_=Coding( system=A.column("class.system"), code=ActEncounterCode(A.column("class.code")), display=A.column("class.display"), ), subject=Reference( display=A.column("subject.display"), reference=FhirReference( resource="Patient", column=A.concat( "pat", A.string_after_delimiter(A.column("subject.reference"), "/"), ), ), ), period=Period(start=A.column("period.start"), end=A.column("period.end")), participant=FhirList( A.column("participant").select( # type: ignore EncounterParticipant( individual=Reference[Union[Practitioner]] ( # type: ignore display=A.field("individual.display"), reference=FhirReference( resource="Practitioner", column=A.concat( "pat", A.string_after_delimiter( A.field("individual.reference"), "/"), ), ), ), type_=FhirList( A.field("type").select( # type: ignore CodeableConcept( coding=FhirList( # type: ignore A.field( "coding").select( # type: ignore Coding( system=A.field("system"), code=ParticipantTypeCode( A.field("code")), display=A.field("display"), ))), text=A.field("text"), ), )), period=Period(start=A.field("period.start"), ), ), ), ), )) assert isinstance(mapper, AutoMapper) result_df: DataFrame = mapper.transform(df=df) assert result_df fhir_encounters_df = df.sql_ctx.table("fhir_encounters") assert isinstance( fhir_encounters_df.select( fhir_encounters_df.period.start).collect()[0][0], datetime.date, ) assert isinstance( fhir_encounters_df.select( fhir_encounters_df.period.end).collect()[0][0], datetime.date, )