def test_autoannotate():
    al = SimpleActiveLearning("test", "animal", ['dog', 'cat'], 1000)
    sources = [{
        "id": 0,
        "source": "This text is about a dog."
    }, {
        "id": 1,
        "source": "This text is not about animals."
    }]
    predictions = [{
        "id": 0,
        "prob": [0.9, 0.1],
        "label": ["__label__0", "__label__1"]
    }, {
        "id": 1,
        "prob": [0.6, 0.4],
        "label": ["__label__0", "__label__1"]
    }]

    autoannotations = al.autoannotate(predictions, sources)
    assert len(autoannotations) == 1
    assert autoannotations[0]['id'] == 0
    assert autoannotations[0]['animal'] == 0
    assert autoannotations[0]['animal-metadata']['class-name'] == 'dog'
    assert autoannotations[0]['animal-metadata']['human-annotated'] == 'no'
def test_select_for_labeling():
    al = SimpleActiveLearning("test", "animal", ["dog", "cat"], 1000)
    sources = [
        {
            "id": 0,
            "source": "This text is about a dog."
        },
        {
            "id": 1,
            "source": "This text is not about animals."
        },
    ]
    predictions = [
        {
            "id": 0,
            "prob": [0.9, 0.1],
            "label": ["__label__0", "__label__1"]
        },
        {
            "id": 1,
            "prob": [0.6, 0.4],
            "label": ["__label__0", "__label__1"]
        },
    ]

    autoannotations = al.autoannotate(predictions, sources)
    selected = al.select_for_labeling(predictions, autoannotations)
    assert len(selected) == 1
    assert selected[0] == 1
def test_autoannotate():
    al = SimpleActiveLearning("test", "animal", ["dog", "cat"], 1000)
    sources = [
        {
            "id": 0,
            "source": "This text is about a dog."
        },
        {
            "id": 1,
            "source": "This text is not about animals."
        },
    ]
    predictions = [
        {
            "id": 0,
            "prob": [0.9, 0.1],
            "label": ["__label__0", "__label__1"]
        },
        {
            "id": 1,
            "prob": [0.6, 0.4],
            "label": ["__label__0", "__label__1"]
        },
    ]

    autoannotations = al.autoannotate(predictions, sources)
    assert len(autoannotations) == 1
    assert autoannotations[0]["id"] == 0
    assert autoannotations[0]["animal"] == 0
    assert autoannotations[0]["animal-metadata"]["class-name"] == "dog"
    assert autoannotations[0]["animal-metadata"]["human-annotated"] == "no"
def lambda_handler(event, context):
    """
    This function generates auto annotatations and performs active learning.
    - auto annotations generates machine labels for confident examples.
    - active learning selects for examples to be labeled by humans next.
    """
    labels_s3_uri = event["LabelCategoryConfigS3Uri"]
    job_name_prefix = event["LabelingJobNamePrefix"]
    job_name = "labeling-job/{}".format(job_name_prefix)
    label_attribute_name = event["LabelAttributeName"]
    meta_data = event["meta_data"]
    intermediate_folder_uri = meta_data["IntermediateFolderUri"]
    input_total = int(meta_data["counts"]["input_total"])
    # Select maximum of 10% of the input total for next round of manual labeling.
    max_selections = input_total // 10
    # Handle corner case where integer division can lead us to 0 selections.
    if max_selections == 0:
        max_selections = input_total

    inference_input_s3_ref, inference_input, sources = collect_inference_inputs(
        meta_data["UnlabeledS3Uri"])
    predictions = collect_inference_outputs(
        meta_data["transform_config"]["S3OutputPath"])
    label_names = get_label_names_from_s3(labels_s3_uri)
    logger.info("Collected {} label names.".format(len(label_names)))

    simple_al = SimpleActiveLearning(job_name, label_attribute_name,
                                     label_names, max_selections)
    meta_data["autoannotations"], auto_annotations = write_auto_annotations(
        simple_al, sources, predictions, inference_input_s3_ref)
    meta_data["selections_s3_uri"], selections = write_selector_file(
        simple_al, sources, predictions, inference_input_s3_ref,
        inference_input, auto_annotations)
    (
        meta_data["selected_job_name"],
        meta_data["selected_job_output_uri"],
    ) = generate_job_id_and_s3_path(job_name_prefix, intermediate_folder_uri)
    meta_data["counts"]["autoannotated"] = len(auto_annotations)
    meta_data["counts"]["selected"] = len(selections)
    return meta_data
def test_compute_margin_high_confidence():
    al = SimpleActiveLearning("test", "animal", ['dog', 'cat'], 1000)
    confidence, chosen = al.compute_margin([0.9, 0.1], ['dog', 'cat'])

    assert chosen == 'dog'
    assert confidence == pytest.approx(0.8)
def test_get_label_index():
    al = SimpleActiveLearning("test", "animal", ['dog', 'cat'], 1000)
    assert al.get_label_index("__label__0") == 0
    assert al.get_label_index("__label__1") == 1
def test_compute_margin_low_confidence():
    al = SimpleActiveLearning("test", "animal", ["dog", "cat"], 1000)
    confidence, chosen = al.compute_margin([0.6, 0.4], ["dog", "cat"])

    assert chosen == "dog"
    assert confidence == pytest.approx(0.2)