示例#1
0
    def test_one_sided(self):
        text = "a&p:\n # dm2 "
        tokens = tokenizer_lib.tokenize(text)

        # Remove suffix only.
        labeled_char_span = ap_parsing_lib.LabeledCharSpan(
            start_char=8,
            end_char=12,
            span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)  # "dm2 "
        self.assertEqual(
            ap_parsing_utils.normalize_labeled_char_span(
                labeled_char_span, tokens),
            ap_parsing_lib.LabeledCharSpan(
                start_char=8,
                end_char=11,
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)
        )  # "dm2"

        # Remove prefix only.
        labeled_char_span = ap_parsing_lib.LabeledCharSpan(
            start_char=3,
            end_char=11,
            span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE
        )  # ":\n # dm2"
        self.assertEqual(
            ap_parsing_utils.normalize_labeled_char_span(
                labeled_char_span, tokens),
            ap_parsing_lib.LabeledCharSpan(
                start_char=8,
                end_char=11,
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)
        )  # "dm2"
示例#2
0
    def test_usage(self):
        #       0   12     34     56 78   90
        text = "some longer tokens in this test"
        #       0123456789012345678901234567890
        #       0         1         2         3

        tokens = tokenizer_lib.tokenize(text)

        labeled_char_spans = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE,
                start_char=5,
                end_char=18),  # "longer tokens" - already normalized.
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE,
                start_char=14,
                end_char=25),  # "kens in thi" -> "tokens in this"
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE,
                start_char=18,
                end_char=19),  # Invalid - only space.
        ]
        expected = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE,
                start_char=5,
                end_char=18),  # "longer tokens"
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE,
                start_char=12,
                end_char=26)  # "tokens in this"
        ]
        self.assertEqual(
            ap_parsing_utils.normalize_labeled_char_spans_iterable(
                labeled_char_spans, tokens), expected)
示例#3
0
    def test_labeled_char_spans_to_token_spans(self):
        #  Char space:
        #       0         1          2          3
        #       01234567890123456 78901234 56789012345
        text = "# DM2: on insulin\n # COPD\n- nebs prn"
        # Token:012 3456 78       90123    4567   89
        #       0                  1
        tokens = tokenizer_lib.tokenize(text)

        labeled_char_spans = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=2,
                end_char=17),  # "DM2: on insulin"
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=21,
                end_char=25),  # "COPD"
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                start_char=28,
                end_char=32),  # "nebs"
        ]

        labeled_token_spans = [
            ap_parsing_lib.LabeledTokenSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_token=2,
                end_token=9),  # "DM2: on insulin"
            ap_parsing_lib.LabeledTokenSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_token=13,
                end_token=14),  # "COPD"
            ap_parsing_lib.LabeledTokenSpan(
                span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                start_token=17,
                end_token=18),  # "nebs"
        ]

        labeled_char_span_to_labeled_token_span = functools.partial(
            ap_parsing_utils.labeled_char_span_to_labeled_token_span,
            tokens=tokens)
        self.assertEqual(labeled_token_spans, [
            labeled_char_span_to_labeled_token_span(labeled_char_span)
            for labeled_char_span in labeled_char_spans
        ])

        labeled_token_span_to_labeled_char_span = functools.partial(
            ap_parsing_utils.labeled_token_span_to_labeled_char_span,
            tokens=tokens)
        self.assertEqual(labeled_char_spans, [
            labeled_token_span_to_labeled_char_span(labeled_token_span)
            for labeled_token_span in labeled_token_spans
        ])
    def build(cls, text, labeled_char_spans):
        """Builds structured AP inplace from text and labels.

    Bundles together labels into clusters based on problem titles.

    Args:
      text: str, text of A&P section
      labeled_char_spans: LabeledCharSpans, which are converted to cluster
        fragments.

    Returns:
      An instance of StructuredAP.
    """

        tokens = tokenizer_lib.tokenize(text)

        labeled_char_spans = ap_parsing_utils.normalize_labeled_char_spans_iterable(
            labeled_char_spans, tokens)
        labeled_char_spans.sort(key=lambda x: x.start_char)

        structured_ap = cls(problem_clusters=list(), prefix_text="")
        structured_ap._parse_problem_clusters(labeled_char_spans, text)  # pylint: disable=protected-access

        prefix_text_span = ap_parsing_utils.normalize_labeled_char_span(
            ap_parsing_lib.LabeledCharSpan(
                start_char=0,
                end_char=labeled_char_spans[0].start_char,
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE), tokens)
        structured_ap.prefix_text = (
            text[prefix_text_span.start_char:prefix_text_span.end_char]
            if prefix_text_span else "")

        return structured_ap
示例#5
0
    def test_get_converted_labels(self):
        ap_text = "\n".join([
            "50 yo m with hx of copd, dm2",
            "#. COPD ex: started on abx in ED.", "  - continue abx."
        ])
        tokens = tokenizer_lib.tokenize(ap_text)

        labels = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=32,
                end_char=39),  # span_text="COPD ex"
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_DESCRIPTION,
                start_char=41,
                end_char=63),  # span_text="started on abx in ED.\n"
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                start_char=67,
                end_char=80,
                action_item_type=ap_parsing_lib.ActionItemType.MEDICATIONS
            ),  # span_text="continue abx."
        ]

        converted_labels = data_lib.generate_model_labels(labels, tokens)

        expected_fragment_labels = np.zeros(45)
        expected_fragment_labels[21] = 1  # B-PT COPD ex
        expected_fragment_labels[22:24] = 2  # I-PT COPD ex

        expected_fragment_labels[26] = 3  # B-PD started on abx in ED
        expected_fragment_labels[27:35] = 4  # I-PD started on abx in ED

        expected_fragment_labels[41] = 5  # B-AI continue abx
        expected_fragment_labels[42:44] = 6  # I-AI continue abx

        expected_ai_labels = np.zeros(45)
        expected_ai_labels[41:44] = 1  # continue abx - medications

        self.assertAllEqual(converted_labels["fragment_type"],
                            expected_fragment_labels)
        self.assertAllEqual(converted_labels["action_item_type"],
                            expected_ai_labels)
示例#6
0
 def test_metadata(self):
     text = "a&p:\n - nebs "
     tokens = tokenizer_lib.tokenize(text)
     labeled_char_span = ap_parsing_lib.LabeledCharSpan(
         start_char=3,
         end_char=11,
         span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
         action_item_type=ap_parsing_lib.ActionItemType.
         MEDICATIONS  # ":\n - neb"
     )
     self.assertEqual(
         ap_parsing_utils.normalize_labeled_char_span(
             labeled_char_span, tokens),
         ap_parsing_lib.LabeledCharSpan(
             start_char=8,
             end_char=12,
             span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
             action_item_type=ap_parsing_lib.ActionItemType.MEDICATIONS)
     )  # "nebs"
示例#7
0
def convert_ratings(csv_labeled_char_span):
    """Converts labeled spans from CSV to LabeledCharSpan object."""
    labeled_char_span = ap_parsing_lib.LabeledCharSpan(
        start_char=csv_labeled_char_span.char_start,
        end_char=csv_labeled_char_span.char_end,
        span_type=ap_parsing_lib.LabeledSpanType[
            csv_labeled_char_span.span_type])
    if csv_labeled_char_span.action_item_type:
        labeled_char_span.action_item_type = ap_parsing_lib.ActionItemType[
            csv_labeled_char_span.action_item_type]

    return (str(csv_labeled_char_span.note_id), labeled_char_span)
示例#8
0
    def test_compile(self):
        structured_ap = aug_lib.StructuredAP(
            prefix_text="50 yo m with hx of dm2, copd",
            problem_clusters=[
                aug_lib.ProblemCluster(fragments=[
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_TITLE,
                            start_char=29,
                            end_char=32),
                        text="dm2",
                        prefix_delim="\n*. ",
                        suffix_delim=": "),
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_DESCRIPTION,
                            start_char=34,
                            end_char=44),
                        text="on insulin",
                        prefix_delim="",
                        suffix_delim=""),
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            ACTION_ITEM,
                            start_char=47,
                            end_char=51),
                        text="RISS",
                        prefix_delim="\n- ",
                        suffix_delim=""),
                ])
            ])

        expected = "50 yo m with hx of dm2, copd\n*. dm2: on insulin\n- RISS"
        result, _ = structured_ap.compile()
        self.assertEqual(result, expected)
示例#9
0
    def test_build(self):
        ap = "\n".join(
            ["50 yo m with hx of dm2, copd", "dm2: on insulin", "- RISS"])
        labeled_char_spans = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=29,
                end_char=32),
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_DESCRIPTION,
                start_char=34,
                end_char=44),
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                start_char=47,
                end_char=51),
        ]

        expected = aug_lib.StructuredAP(
            prefix_text="50 yo m with hx of dm2, copd",
            problem_clusters=[
                aug_lib.ProblemCluster(fragments=[
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_TITLE,
                            start_char=29,
                            end_char=32),
                        text="dm2",
                        prefix_delim=aug_lib._DefaultDelims.
                        PROBLEM_TITLE_PREFIX,
                        suffix_delim=aug_lib._DefaultDelims.
                        PROBLEM_TITLE_SUFFIX),
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_DESCRIPTION,
                            start_char=34,
                            end_char=44),
                        text="on insulin",
                        prefix_delim=aug_lib._DefaultDelims.
                        PROBLEM_DESCRIPTION_PREFIX,
                        suffix_delim=""),
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            ACTION_ITEM,
                            start_char=47,
                            end_char=51),
                        text="RISS",
                        prefix_delim=aug_lib._DefaultDelims.ACTION_ITEM_PREFIX,
                        suffix_delim=""),
                ])
            ])

        structured_ap = aug_lib.StructuredAP.build(ap, labeled_char_spans)
        self.assertEqual(structured_ap, expected)
示例#10
0
def problem_cluster_to_labeled_char_spans(problem_cluster):
    """Convert regex annotator output to labeled char spans.

  Args:
    problem_cluster: ProblemCluster object containing character spans.

  Returns:
    A list of LabeledCharSpan corresponding to the spans in the cluster.
  """

    labeled_char_spans = []
    # Problem title
    labeled_char_spans.append(
        ap_parsing_lib.LabeledCharSpan(
            span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
            start_char=problem_cluster.problem_title[0],
            end_char=problem_cluster.problem_title[1]))

    # Problem description
    for pd_start, pd_end in problem_cluster.problem_description:
        if pd_start > 0:
            labeled_char_spans.append(
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.
                    PROBLEM_DESCRIPTION,
                    start_char=pd_start,
                    end_char=pd_end))

    # Action items
    for ai_start, ai_end in problem_cluster.action_items:
        labeled_char_spans.append(
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                start_char=ai_start,
                end_char=ai_end))
    return labeled_char_spans
示例#11
0
    def test_compile_with_labels(self):
        structured_ap = aug_lib.StructuredAP(
            prefix_text="50 yo m with hx of dm2, copd",
            problem_clusters=[  # spans are kept from *original* text.
                aug_lib.ProblemCluster(fragments=[
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_TITLE,
                            start_char=29,
                            end_char=32),
                        text="dm2",
                        prefix_delim="\n*. ",
                        suffix_delim=": "),
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_DESCRIPTION,
                            start_char=34,
                            end_char=44),
                        text="on insulin",
                        prefix_delim="",
                        suffix_delim=""),
                    aug_lib.ProblemClusterFragment(
                        labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            ACTION_ITEM,
                            start_char=47,
                            end_char=51),
                        text="RISS",
                        prefix_delim="\n- ",
                        suffix_delim=""),
                ])
            ])

        expected = (
            "50 yo m with hx of dm2, copd\n*. dm2: on insulin\n- RISS",
            [
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                    start_char=32,
                    end_char=35),  # span_text="dm2"
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.
                    PROBLEM_DESCRIPTION,
                    start_char=37,
                    end_char=47),  # span_text="on insulin"
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                    start_char=50,
                    end_char=54),  # span_text="RISS"
            ])
        result_ap_text, result_labeled_char_spans = structured_ap.compile()
        self.assertEqual((result_ap_text, result_labeled_char_spans), expected)
示例#12
0
def labeled_token_span_to_labeled_char_span(labeled_token_span, tokens):
    """Converts labeled spans from token to character level.

  Args:
    labeled_token_span: A token level span.
    tokens: Document tokens.

  Returns:
    LabeledCharSpan: Character level labeled span.
  """
    start_char, end_char = token_span_to_char_span(
        tokens, (labeled_token_span.start_token, labeled_token_span.end_token))
    labeled_char_span = ap_parsing_lib.LabeledCharSpan(
        span_type=labeled_token_span.span_type,
        action_item_type=labeled_token_span.action_item_type,
        start_char=start_char,
        end_char=end_char)
    return labeled_char_span
示例#13
0
    def test_midword(self):
        text = "a&p:\n # COPD: on nebs "
        tokens = tokenizer_lib.tokenize(text)

        # Extend word boundry right.
        labeled_char_span = ap_parsing_lib.LabeledCharSpan(
            start_char=6,
            end_char=11,
            span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)  # "# COP"
        self.assertEqual(
            ap_parsing_utils.normalize_labeled_char_span(
                labeled_char_span, tokens),
            ap_parsing_lib.LabeledCharSpan(
                start_char=8,
                end_char=12,
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)
        )  # "COPD"

        # Extend word boundry left.
        labeled_char_span = ap_parsing_lib.LabeledCharSpan(
            start_char=9,
            end_char=14,
            span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)  # "OPD: "
        self.assertEqual(
            ap_parsing_utils.normalize_labeled_char_span(
                labeled_char_span, tokens),
            ap_parsing_lib.LabeledCharSpan(
                start_char=8,
                end_char=12,
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)
        )  # "COPD"

        # Extend word boundry both directions.
        labeled_char_span = ap_parsing_lib.LabeledCharSpan(
            start_char=9,
            end_char=11,
            span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)  # "OP"
        self.assertEqual(
            ap_parsing_utils.normalize_labeled_char_span(
                labeled_char_span, tokens),
            ap_parsing_lib.LabeledCharSpan(
                start_char=8,
                end_char=12,
                span_type=ap_parsing_lib.LabeledSpanType.UNKNOWN_TYPE)
        )  # "COPD"
示例#14
0
    def test_process_rating_labels(self):
        rating_labels = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=0,
                end_char=50),  # before
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=45,
                end_char=65),  # partially contained
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=50,
                end_char=150),  # exactly matches section
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=100,
                end_char=105),  # contained
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=150,
                end_char=155),  # after
        ]

        expected = [
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=0,
                end_char=100),
            ap_parsing_lib.LabeledCharSpan(
                span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                start_char=50,
                end_char=55)
        ]
        self.assertEqual(
            data_lib.process_rating_labels(
                rating_labels, note_section_lib.Section(50, 150, [])),
            expected)
示例#15
0
    def test_usage(self):
        augmentation_config = aug_lib.AugmentationConfig(
            augmentation_sequences=[
                aug_lib.AugmentationSequence(
                    name="test",
                    augmentation_sequence=[
                        aug_lib.ChangeDelimAugmentation(fragment_types=[
                            ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE
                        ],
                                                        delims=["\n"])
                    ])
            ],
            augmentation_number_deterministic=1)

        ap_data = [
            (
                "0|10",
                data_lib.APData(
                    note_id=0,
                    subject_id=0,
                    ap_text="a&p:\n # dm2:\n-RISS",
                    labeled_char_spans=[
                        ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_TITLE,
                            start_char=8,
                            end_char=11),  # span_text="dm2",
                        ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            ACTION_ITEM,
                            action_item_type=ap_parsing_lib.ActionItemType.
                            MEDICATIONS,
                            start_char=14,
                            end_char=18)  # span_text="RISS",
                    ])),
        ]
        expected = [
            *ap_data,
            (
                "0|10",
                data_lib.APData(
                    note_id=0,
                    subject_id=0,
                    ap_text="a&p\ndm2:\n- RISS",
                    tokens=tokenizer_lib.tokenize("a&p\ndm2:\n- RISS"),
                    labeled_char_spans=[
                        ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            PROBLEM_TITLE,
                            start_char=4,
                            end_char=7),  # span_text="dm2",
                        ap_parsing_lib.LabeledCharSpan(
                            span_type=ap_parsing_lib.LabeledSpanType.
                            ACTION_ITEM,
                            action_item_type=ap_parsing_lib.ActionItemType.
                            MEDICATIONS,
                            start_char=11,
                            end_char=15)  # span_text="RISS",
                    ],
                    augmentation_name="test")),
        ]

        with test_pipeline.TestPipeline() as p:
            results = (p
                       | beam.Create(ap_data)
                       | beam.ParDo(data_lib.ApplyAugmentations(),
                                    augmentation_config))
            util.assert_that(results, util.equal_to(expected))
示例#16
0
 def setUp(self):
     super().setUp()
     self.problem_clusters = [
         aug_lib.ProblemCluster(fragments=[
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                     start_char=29,
                     end_char=32),
                 text="dm2",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.
                     PROBLEM_DESCRIPTION,
                     start_char=34,
                     end_char=44),
                 text="on insulin",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     action_item_type=ap_parsing_lib.ActionItemType.
                     MEDICATIONS,
                     start_char=47,
                     end_char=51),
                 text="RISS",
                 prefix_delim="",
                 suffix_delim="")
         ]),
         aug_lib.ProblemCluster(fragments=[
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                     start_char=52,
                     end_char=58),
                 text="anemia",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     action_item_type=ap_parsing_lib.ActionItemType.
                     OBSERVATIONS_LABS,
                     start_char=59,
                     end_char=64),
                 text="trend",
                 prefix_delim="",
                 suffix_delim="")
         ]),
         aug_lib.ProblemCluster(fragments=[
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                     start_char=65,
                     end_char=69),
                 text="COPD",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     action_item_type=ap_parsing_lib.ActionItemType.
                     MEDICATIONS,
                     start_char=70,
                     end_char=74),
                 text="nebs",
                 prefix_delim="",
                 suffix_delim="")
         ]),
         aug_lib.ProblemCluster(fragments=[
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                     start_char=75,
                     end_char=81),
                 text="sepsis",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.
                     PROBLEM_DESCRIPTION,
                     start_char=82,
                     end_char=93),
                 text="dd pna, uti",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.
                     PROBLEM_DESCRIPTION,
                     start_char=94,
                     end_char=117),
                 text="yesterday without fever",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     action_item_type=ap_parsing_lib.ActionItemType.
                     MEDICATIONS,
                     start_char=118,
                     end_char=127),
                 text="cont. abx",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     action_item_type=ap_parsing_lib.ActionItemType.
                     OBSERVATIONS_LABS,
                     start_char=128,
                     end_char=131),
                 text="cis",
                 prefix_delim="",
                 suffix_delim=""),
             aug_lib.ProblemClusterFragment(
                 labeled_char_span=ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     action_item_type=ap_parsing_lib.ActionItemType.
                     CONSULTS,
                     start_char=132,
                     end_char=142),
                 text="id consult",
                 prefix_delim="",
                 suffix_delim="")
         ])
     ]
     self.ap = aug_lib.StructuredAP(problem_clusters=self.problem_clusters,
                                    prefix_text="")
示例#17
0
    def test_multiratings(self):
        section_markers = {
            "hpi": ["history of present illness"],
            "a&p": ["assessment and plan"],
        }
        ap_text = "a&p:\n # dm2:\n-RISS"
        notes_with_ratings = [("0", {
            "notes": [
                data_lib.Note(note_id=0,
                              text="blablabla\n" + ap_text,
                              subject_id=0,
                              category="PHYSICIAN")
            ],
            "ratings":
            [[
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                    start_char=19,
                    end_char=22),
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                    start_char=24,
                    end_char=28)
            ],
             [
                 ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                     start_char=18,
                     end_char=22),
                 ap_parsing_lib.LabeledCharSpan(
                     span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                     start_char=25,
                     end_char=28)
             ]],
            "note_partition": ["test", "test"]
        })]

        expected = [(
            "0|10",
            data_lib.APData(
                partition=data_lib.Partition.TEST,
                note_id="0",
                subject_id="0",
                ap_text=ap_text,
                char_offset=10,
                tokens=tokenizer_lib.tokenize(ap_text),
                labeled_char_spans=[
                    ap_parsing_lib.LabeledCharSpan(
                        span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                        start_char=8,
                        end_char=11),
                    ap_parsing_lib.LabeledCharSpan(
                        span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                        start_char=14,
                        end_char=18)
                ]))] * 2
        with test_pipeline.TestPipeline() as p:
            results = (p
                       | beam.Create(notes_with_ratings)
                       | beam.ParDo(
                           data_lib.ProcessAPData(
                               filter_inorganic_threshold=0), section_markers))
            util.assert_that(results, util.equal_to(expected))
示例#18
0
    def test_usage(self):
        section_markers = {
            "hpi": ["history of present illness"],
            "a&p": ["assessment and plan"],
        }
        ap_texts = ["a&p:\n # dm2:\n-RISS", "a&p:\n # COPD:\n-nebs"]
        notes_with_ratings = [("0", {
            "notes": [
                data_lib.Note(note_id=0,
                              text="blablabla\n" + ap_texts[0],
                              subject_id=0,
                              category="PHYSICIAN")
            ],
            "ratings": [[
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.PROBLEM_TITLE,
                    start_char=19,
                    end_char=22),
                ap_parsing_lib.LabeledCharSpan(
                    span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                    action_item_type=ap_parsing_lib.ActionItemType.MEDICATIONS,
                    start_char=24,
                    end_char=28)
            ]],
            "note_partition": ["val"]
        })] + [("1", {
            "notes": [
                data_lib.Note(note_id=1,
                              text="blablabla\n" + ap_texts[1],
                              subject_id=1,
                              category="PHYSICIAN")
            ],
            "ratings": [],
            "note_partition": []
        })]

        expected = [
            ("0|10",
             data_lib.APData(
                 partition=data_lib.Partition.VAL,
                 note_id="0",
                 subject_id="0",
                 ap_text=ap_texts[0],
                 char_offset=10,
                 tokens=tokenizer_lib.tokenize(ap_texts[0]),
                 labeled_char_spans=[
                     ap_parsing_lib.LabeledCharSpan(
                         span_type=ap_parsing_lib.LabeledSpanType.
                         PROBLEM_TITLE,
                         start_char=8,
                         end_char=11),
                     ap_parsing_lib.LabeledCharSpan(
                         span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                         action_item_type=ap_parsing_lib.ActionItemType.
                         MEDICATIONS,
                         start_char=14,
                         end_char=18)
                 ])),
            ("1|10",
             data_lib.APData(
                 partition=data_lib.Partition.NONRATED,
                 note_id="1",
                 subject_id="1",
                 ap_text=ap_texts[1],
                 char_offset=10,
                 tokens=tokenizer_lib.tokenize(ap_texts[1]),
                 labeled_char_spans=[
                     ap_parsing_lib.LabeledCharSpan(
                         span_type=ap_parsing_lib.LabeledSpanType.
                         PROBLEM_TITLE,
                         start_char=8,
                         end_char=12),
                     ap_parsing_lib.LabeledCharSpan(
                         span_type=ap_parsing_lib.LabeledSpanType.ACTION_ITEM,
                         start_char=15,
                         end_char=19)
                 ]))
        ]
        with test_pipeline.TestPipeline() as p:
            results = (p
                       | beam.Create(notes_with_ratings)
                       | beam.ParDo(
                           data_lib.ProcessAPData(
                               filter_inorganic_threshold=0), section_markers))
            util.assert_that(results, util.equal_to(expected))