def test_api_failure():
    input_df = pd.DataFrame({INPUT_COLUMN: [APICaseEnum.API_FAILURE]})
    df = api_parallelizer(
        input_df=input_df, api_call_function=call_mock_api, api_exceptions=API_EXCEPTIONS, column_prefix=COLUMN_PREFIX
    )
    output_dictionary = df.iloc[0, :].to_dict()
    expected_dictionary = APICaseEnum.API_FAILURE.value
    for k in expected_dictionary:
        assert output_dictionary[k] == expected_dictionary[k]
def test_invalid_input():
    input_df = pd.DataFrame({INPUT_COLUMN: [APICaseEnum.INVALID_INPUT]})
    df = api_parallelizer(
        input_df=input_df,
        api_call_function=call_mock_api,
        api_exceptions=API_EXCEPTIONS,
        column_prefix=COLUMN_PREFIX,
        api_function_param="invalid_integer",
    )
    output_dictionary = df.iloc[0, :].to_dict()
    expected_dictionary = APICaseEnum.INVALID_INPUT.value
    for k in expected_dictionary:
        assert output_dictionary[k] == expected_dictionary[k]
        input_folder_bucket=plugin_params.input_folder_bucket,
        input_folder_root_path=plugin_params.input_folder_root_path,
    )
    return response_json


# ==============================================================================
# RUN
# ==============================================================================

# Call API in parallel
df = api_parallelizer(
    input_df=plugin_params.input_df,
    api_call_function=call_api_moderation,
    api_exceptions=API_EXCEPTIONS,
    parallel_workers=plugin_params.parallel_workers,
    error_handling=plugin_params.error_handling,
    minimum_score=plugin_params.minimum_score,
    column_prefix=column_prefix,
)

# Format API results
api_formatter = UnsafeContentAPIFormatter(
    input_df=plugin_params.input_df,
    category_level=plugin_params.unsafe_content_category_level,
    content_categories_top_level=plugin_params.unsafe_content_categories_top_level,
    content_categories_second_level=plugin_params.unsafe_content_categories_second_level,
    error_handling=plugin_params.error_handling,
    column_prefix=column_prefix,
)
output_df = api_formatter.format_df(df)
        input_folder_root_path=plugin_params.input_folder_root_path,
    )
    return response_json


# ==============================================================================
# RUN
# ==============================================================================

# Call API in parallel
df = api_parallelizer(
    input_df=plugin_params.input_df,
    api_call_function=call_api_object_detection,
    api_exceptions=API_EXCEPTIONS,
    parallel_workers=plugin_params.parallel_workers,
    error_handling=plugin_params.error_handling,
    num_objects=plugin_params.num_objects,
    minimum_score=plugin_params.minimum_score,
    orientation_correction=plugin_params.orientation_correction,
    column_prefix=column_prefix,
)

# Format API results
api_formatter = ObjectDetectionLabelingAPIFormatter(
    input_df=plugin_params.input_df,
    num_objects=plugin_params.num_objects,
    orientation_correction=plugin_params.orientation_correction,
    input_folder=plugin_params.input_folder,
    error_handling=plugin_params.error_handling,
    parallel_workers=plugin_params.parallel_workers,
    column_prefix=column_prefix,
Exemplo n.º 5
0
                                           type=DOCUMENT_TYPE)
        if entity_sentiment:
            response = client.analyze_entity_sentiment(
                document=document, encoding_type=ENCODING_TYPE)
        else:
            response = client.analyze_entities(document=document,
                                               encoding_type=ENCODING_TYPE)
        return MessageToJson(response)


df = api_parallelizer(
    input_df=input_df,
    api_call_function=call_api_named_entity_recognition,
    api_exceptions=API_EXCEPTIONS,
    column_prefix=column_prefix,
    parallel_workers=parallel_workers,
    error_handling=error_handling,
    text_column=text_column,
    text_language=text_language,
    entity_sentiment=entity_sentiment,
)

api_formatter = NamedEntityRecognitionAPIFormatter(
    input_df=input_df,
    column_prefix=column_prefix,
    entity_types=entity_types,
    minimum_score=minimum_score,
    error_handling=error_handling,
)
output_df = api_formatter.format_df(df)
Exemplo n.º 6
0
    text = row[text_column]
    if not isinstance(text, str) or str(text).strip() == "":
        return ""
    else:
        document = language.types.Document(content=text,
                                           language=text_language,
                                           type=DOCUMENT_TYPE)
        response = client.classify_text(document=document)
        return MessageToJson(response)


df = api_parallelizer(
    input_df=input_df,
    api_call_function=call_api_text_classification,
    api_exceptions=API_EXCEPTIONS,
    column_prefix=column_prefix,
    parallel_workers=parallel_workers,
    error_handling=error_handling,
    text_column=text_column,
    text_language=text_language,
)

api_formatter = TextClassificationAPIFormatter(
    input_df=input_df,
    column_prefix=column_prefix,
    num_categories=num_categories,
    error_handling=error_handling,
)
output_df = api_formatter.format_df(df)

output_dataset.write_with_schema(output_df)
set_column_description(