def test_autoannotate(): al = SimpleActiveLearning("test", "animal", ['dog', 'cat'], 1000) sources = [{ "id": 0, "source": "This text is about a dog." }, { "id": 1, "source": "This text is not about animals." }] predictions = [{ "id": 0, "prob": [0.9, 0.1], "label": ["__label__0", "__label__1"] }, { "id": 1, "prob": [0.6, 0.4], "label": ["__label__0", "__label__1"] }] autoannotations = al.autoannotate(predictions, sources) assert len(autoannotations) == 1 assert autoannotations[0]['id'] == 0 assert autoannotations[0]['animal'] == 0 assert autoannotations[0]['animal-metadata']['class-name'] == 'dog' assert autoannotations[0]['animal-metadata']['human-annotated'] == 'no'
def test_select_for_labeling(): al = SimpleActiveLearning("test", "animal", ["dog", "cat"], 1000) sources = [ { "id": 0, "source": "This text is about a dog." }, { "id": 1, "source": "This text is not about animals." }, ] predictions = [ { "id": 0, "prob": [0.9, 0.1], "label": ["__label__0", "__label__1"] }, { "id": 1, "prob": [0.6, 0.4], "label": ["__label__0", "__label__1"] }, ] autoannotations = al.autoannotate(predictions, sources) selected = al.select_for_labeling(predictions, autoannotations) assert len(selected) == 1 assert selected[0] == 1
def test_autoannotate(): al = SimpleActiveLearning("test", "animal", ["dog", "cat"], 1000) sources = [ { "id": 0, "source": "This text is about a dog." }, { "id": 1, "source": "This text is not about animals." }, ] predictions = [ { "id": 0, "prob": [0.9, 0.1], "label": ["__label__0", "__label__1"] }, { "id": 1, "prob": [0.6, 0.4], "label": ["__label__0", "__label__1"] }, ] autoannotations = al.autoannotate(predictions, sources) assert len(autoannotations) == 1 assert autoannotations[0]["id"] == 0 assert autoannotations[0]["animal"] == 0 assert autoannotations[0]["animal-metadata"]["class-name"] == "dog" assert autoannotations[0]["animal-metadata"]["human-annotated"] == "no"
def lambda_handler(event, context): """ This function generates auto annotatations and performs active learning. - auto annotations generates machine labels for confident examples. - active learning selects for examples to be labeled by humans next. """ labels_s3_uri = event["LabelCategoryConfigS3Uri"] job_name_prefix = event["LabelingJobNamePrefix"] job_name = "labeling-job/{}".format(job_name_prefix) label_attribute_name = event["LabelAttributeName"] meta_data = event["meta_data"] intermediate_folder_uri = meta_data["IntermediateFolderUri"] input_total = int(meta_data["counts"]["input_total"]) # Select maximum of 10% of the input total for next round of manual labeling. max_selections = input_total // 10 # Handle corner case where integer division can lead us to 0 selections. if max_selections == 0: max_selections = input_total inference_input_s3_ref, inference_input, sources = collect_inference_inputs( meta_data["UnlabeledS3Uri"]) predictions = collect_inference_outputs( meta_data["transform_config"]["S3OutputPath"]) label_names = get_label_names_from_s3(labels_s3_uri) logger.info("Collected {} label names.".format(len(label_names))) simple_al = SimpleActiveLearning(job_name, label_attribute_name, label_names, max_selections) meta_data["autoannotations"], auto_annotations = write_auto_annotations( simple_al, sources, predictions, inference_input_s3_ref) meta_data["selections_s3_uri"], selections = write_selector_file( simple_al, sources, predictions, inference_input_s3_ref, inference_input, auto_annotations) ( meta_data["selected_job_name"], meta_data["selected_job_output_uri"], ) = generate_job_id_and_s3_path(job_name_prefix, intermediate_folder_uri) meta_data["counts"]["autoannotated"] = len(auto_annotations) meta_data["counts"]["selected"] = len(selections) return meta_data
def test_compute_margin_high_confidence(): al = SimpleActiveLearning("test", "animal", ['dog', 'cat'], 1000) confidence, chosen = al.compute_margin([0.9, 0.1], ['dog', 'cat']) assert chosen == 'dog' assert confidence == pytest.approx(0.8)
def test_get_label_index(): al = SimpleActiveLearning("test", "animal", ['dog', 'cat'], 1000) assert al.get_label_index("__label__0") == 0 assert al.get_label_index("__label__1") == 1
def test_compute_margin_low_confidence(): al = SimpleActiveLearning("test", "animal", ["dog", "cat"], 1000) confidence, chosen = al.compute_margin([0.6, 0.4], ["dog", "cat"]) assert chosen == "dog" assert confidence == pytest.approx(0.2)