Пример #1
0
def test_final_error_code_is_converted_to_column():
    transfer = build_transfer(final_error_code=5)

    expected_error_code_column = {"final_error_code": [5]}

    table = convert_transfers_to_table([transfer])
    actual_error_code_column = table.select(["final_error_code"]).to_pydict()

    assert actual_error_code_column == expected_error_code_column
Пример #2
0
def test_conversation_id_is_converted_to_column():
    transfer = build_transfer(conversation_id="123")

    expected_conversation_column = {"conversation_id": ["123"]}

    table = convert_transfers_to_table([transfer])
    actual_conversation_column = table.select(["conversation_id"]).to_pydict()

    assert actual_conversation_column == expected_conversation_column
Пример #3
0
def test_sender_error_code_is_converted_to_column_when_missing():
    transfer = build_transfer(sender_error_code=None)

    expected_error_code_column = {"sender_error_code": [None]}

    table = convert_transfers_to_table([transfer])
    actual_error_code_column = table.select(["sender_error_code"]).to_pydict()

    assert actual_error_code_column == expected_error_code_column
Пример #4
0
def test_sla_duration_is_converted_to_column_when_missing():
    transfer = build_transfer(sla_duration=None)

    expected_sla_duration_column = {"sla_duration": [None]}

    table = convert_transfers_to_table([transfer])
    actual_sla_duration_column = table.select(["sla_duration"]).to_pydict()

    assert actual_sla_duration_column == expected_sla_duration_column
Пример #5
0
def test_sending_practice_asid_is_converted_to_column():
    transfer = build_transfer(sending_practice_asid="001112345678")

    expected_asid_column = {"sending_practice_asid": ["001112345678"]}

    table = convert_transfers_to_table([transfer])
    actual_asid_column = table.select(["sending_practice_asid"]).to_pydict()

    assert actual_asid_column == expected_asid_column
Пример #6
0
def test_date_completed_is_converted_to_column_when_missing():
    transfer = build_transfer(date_completed=None)

    expected_date_column = {"date_completed": [None]}

    table = convert_transfers_to_table([transfer])
    actual_date_column = table.select(["date_completed"]).to_pydict()

    assert actual_date_column == expected_date_column
Пример #7
0
def test_status_is_converted_to_column():
    transfer = build_transfer(status=TransferStatus.INTEGRATED)

    expected_status_column = {"status": ["INTEGRATED"]}

    table = convert_transfers_to_table([transfer])
    actual_status_column = table.select(["status"]).to_pydict()

    assert actual_status_column == expected_status_column
Пример #8
0
def test_sla_duration_is_rounded_to_integer():
    transfer = build_transfer(sla_duration=timedelta(
        days=2, hours=1, minutes=3, seconds=6, milliseconds=1))

    expected_sla_duration_column = {"sla_duration": [176586]}

    table = convert_transfers_to_table([transfer])
    actual_sla_duration_column = table.select(["sla_duration"]).to_pydict()

    assert actual_sla_duration_column == expected_sla_duration_column
Пример #9
0
def test_intermediate_error_codes_is_converted_to_column_when_empty():
    transfer = build_transfer(intermediate_error_codes=[])

    expected_error_code_column = {"intermediate_error_codes": [[]]}

    table = convert_transfers_to_table([transfer])
    actual_error_code_column = table.select(["intermediate_error_codes"
                                             ]).to_pydict()

    assert actual_error_code_column == expected_error_code_column
Пример #10
0
def test_date_completed_is_converted_to_column():
    transfer = build_transfer(
        date_completed=datetime(year=2020, month=7, day=28, hour=17))

    expected_date_column = {
        "date_completed": [datetime(year=2020, month=7, day=28, hour=17)]
    }

    table = convert_transfers_to_table([transfer])
    actual_date_column = table.select(["date_completed"]).to_pydict()

    assert actual_date_column == expected_date_column
Пример #11
0
def test_converts_multiple_rows_into_table():
    transfers = [
        build_transfer(conversation_id="123", final_error_code=1),
        build_transfer(conversation_id="456", final_error_code=2),
        build_transfer(conversation_id="789", final_error_code=3),
    ]

    expected_columns = {
        "conversation_id": ["123", "456", "789"],
        "final_error_code": [1, 2, 3]
    }

    table = convert_transfers_to_table(transfers)
    actual_columns = table.select(["conversation_id",
                                   "final_error_code"]).to_pydict()

    assert actual_columns == expected_columns
Пример #12
0
def test_table_has_correct_schema():
    transfers = [build_transfer()]

    expected_schema = pa.schema([
        ("conversation_id", pa.string()),
        ("sla_duration", pa.uint64()),
        ("requesting_practice_asid", pa.string()),
        ("sending_practice_asid", pa.string()),
        ("requesting_supplier", pa.string()),
        ("sending_supplier", pa.string()),
        ("sender_error_code", pa.int64()),
        ("final_error_code", pa.int64()),
        ("intermediate_error_codes", pa.list_(pa.int64())),
        ("status", pa.string()),
        ("date_requested", pa.timestamp("us")),
        ("date_completed", pa.timestamp("us")),
    ])

    table = convert_transfers_to_table(transfers)
    actual_schema = table.schema

    assert actual_schema == expected_schema
def main():
    args = parse_platform_metrics_calculator_pipeline_arguments(sys.argv[1:])
    time_range = _get_time_range(args.year, args.month)

    organisation_data = read_json_file(args.organisation_list_file)
    organisation_metadata = construct_organisation_list_from_dict(data=organisation_data)

    spine_messages = _read_spine_csv_gz_files(args.input_files)
    transfers = list(parse_transfers_from_messages(spine_messages, time_range))
    practice_metrics_data = calculate_practice_metrics_data(
        transfers, organisation_metadata.practices, time_range
    )
    national_metrics_data = calculate_national_metrics_data(
        transfers=transfers, time_range=time_range
    )
    organisation_metadata = construct_organisation_metadata(organisation_metadata)
    transfer_table = convert_transfers_to_table(transfers)

    practice_metrics_file_name = "practiceMetrics.json"
    organisation_metadata_file_name = "organisationMetadata.json"
    national_metrics_file_name = "nationalMetrics.json"
    transfers_file_name = "transfers.parquet"

    if _is_outputting_to_file(args):
        _write_data_platform_json_file(
            practice_metrics_data,
            f"{args.output_directory}/{args.month}-{args.year}-{practice_metrics_file_name}",
        )
        _write_data_platform_json_file(
            organisation_metadata,
            f"{args.output_directory}/{args.month}-{args.year}-{organisation_metadata_file_name}",
        )
        _write_data_platform_json_file(
            national_metrics_data,
            f"{args.output_directory}/{args.month}-{args.year}-{national_metrics_file_name}",
        )
        write_table(
            transfer_table,
            f"{args.output_directory}/{args.month}-{args.year}-{transfers_file_name}",
        )
    elif _is_outputting_to_s3(args):
        s3 = boto3.resource("s3", endpoint_url=args.s3_endpoint_url)

        bucket_name = args.output_bucket
        version = "v2"
        s3_path = f"{version}/{args.year}/{args.month}"

        _upload_data_platform_json_object(
            practice_metrics_data,
            s3.Object(bucket_name, f"{s3_path}/{practice_metrics_file_name}"),
        )
        _upload_data_platform_json_object(
            organisation_metadata,
            s3.Object(bucket_name, f"{s3_path}/{organisation_metadata_file_name}"),
        )
        _upload_data_platform_json_object(
            national_metrics_data,
            s3.Object(bucket_name, f"{s3_path}/{national_metrics_file_name}"),
        )
        write_table(
            table=transfer_table,
            where=bucket_name + "/" + f"{s3_path}/{transfers_file_name}",
            filesystem=S3FileSystem(endpoint_override=args.s3_endpoint_url),
        )