示例#1
0
 def test_max_scope_none(self):
     sectionizer = Sectionizer(nlp, rules=None, max_scope=None)
     sectionizer.add(
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"))
     doc = nlp("Past Medical History: This is the sentence.")
     sectionizer(doc)
     assert doc[-1]._.section_category == "past_medical_history"
示例#2
0
 def test_parent_section_multiple_candidates(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add([
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"),
         SectionRule(category="explanation",
                     literal="Explanation:",
                     parents=["past_medical_history", "allergies"]),
     ])
     text = "Past Medical History: some other text. Explanation: The patient has one"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 2
     pmh = doc._.sections[0]
     explanation = doc._.sections[1]
     assert pmh.parent is None
     assert explanation.parent.category == "past_medical_history"
 def test_parent_section_chain(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add([
         SectionRule(category="s1", literal="section 1:"),
         SectionRule(category="s2", literal="section 2:", parents=["s1"]),
         SectionRule(category="s3", literal="section 3:", parents=["s2"]),
     ])
     text = "section 1: abc section 2: abc section 3: abc"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 3
     s1 = doc._.sections[0]
     s2 = doc._.sections[1]
     s3 = doc._.sections[2]
     assert s1.parent is None
     assert s2.parent.category == "s1"
     assert s3.parent.category == "s2"
 def test_parent_section_parent_required(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add([
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"),
         SectionRule(category="explanation",
                     literal="Explanation:",
                     parents=["past_medical_history"],
                     parent_required=True),
     ])
     text = "other text Explanation: The patient has one"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 1
     section = doc._.sections[0]
     print(section)
     assert section.category is None
     assert section.parent is None
 def test_end_line(self):
     sectionizer = Sectionizer(nlp, rules=None, require_end_line=True)
     sectionizer.add(
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"))
     text = "\n\n Past Medical History:\n The patient has a Past Medical History: this"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 2
 def test_parent_section_no_valid_parent(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add([
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"),
         SectionRule(category="allergies", literal="Allergies:"),
         SectionRule(category="explanation",
                     literal="Explanation:",
                     parents=["past_medical_history"]),
     ])
     text = "Past Medical History: some other text. Allergies: peanuts Explanation: pt cannot eat peanuts"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 3
     pmh = doc._.sections[0]
     allergies = doc._.sections[1]
     explanation = doc._.sections[2]
     assert pmh.parent is None
     assert allergies.parent is None
     assert explanation.parent is None
 def test_max_scope(self):
     sectionizer = Sectionizer(nlp, rules=None, max_scope=2)
     sectionizer.add(
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"))
     doc = nlp("Past Medical History: This is the sentence.")
     sectionizer(doc)
     section = doc._.sections[0]
     assert section.body_span[
         0]._.section_category == "past_medical_history"
     # This should be out of range of the section scope
     assert section.body_span[3]._.section_category is None
 def test_num_sections(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add(
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"))
     doc = nlp("Past Medical History: PE")
     sectionizer(doc)
     assert len(doc._.sections) == 1
     # Now reprocess and make sure it resets
     doc = nlp("Past Medical History: PE")
     sectionizer(doc)
     assert len(doc._.sections) == 1
 def test_parent_section_chain_backtracking_interrupted(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add([
         SectionRule(category="s1", literal="section 1:"),
         SectionRule(category="s2", literal="section 2:", parents=["s1"]),
         SectionRule(category="s3", literal="section 3:", parents=["s2"]),
         SectionRule(category="s4", literal="section 4:", parents=["s1"]),
         SectionRule(category="break", literal="section break:"),
     ])
     text = "section 1: abc section 2: abc section 3: abc section break: abc section 4: abc"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 5
     s1 = doc._.sections[0]
     s2 = doc._.sections[1]
     s3 = doc._.sections[2]
     s4 = doc._.sections[4]
     assert s1.parent is None
     assert s2.parent.category == "s1"
     assert s3.parent.category == "s2"
     assert s4.parent is None
示例#10
0
 def test_max_scope_rule(self):
     sectionizer = Sectionizer(nlp, rules=None, max_scope=2)
     sectionizer.add(
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:",
                     max_scope=100))
     doc = nlp("Past Medical History: This is the sentence.")
     sectionizer(doc)
     section = doc._.sections[-1]
     token = doc[-1]
     assert section.category == "past_medical_history"
     assert token in section.section_span
     assert token._.section_category == "past_medical_history"
示例#11
0
    def test_doc_attributes(self):
        sectionizer = Sectionizer(nlp, rules=None)
        sectionizer.add(
            SectionRule(category="past_medical_history",
                        literal="Past Medical History:"))
        doc = nlp("Past Medical History: PE")
        sectionizer(doc)

        assert len(doc._.sections)
        assert len(doc._.section_categories)
        assert len(doc._.section_titles)
        assert len(doc._.section_spans)
        assert len(doc._.section_bodies)
示例#12
0
 def test_parent_section_duplicate_sections_different_parents(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add([
         SectionRule(category="past_medical_history",
                     literal="Past Medical History:"),
         SectionRule(category="allergies", literal="Allergies:"),
         SectionRule(category="explanation",
                     literal="Explanation:",
                     parents=["past_medical_history", "allergies"]),
     ])
     text = "Past Medical History: some other text. Explanation: The patient has one. Allergies: peanuts Explanation: pt cannot eat peanuts"
     doc = nlp(text)
     sectionizer(doc)
     assert len(doc._.sections) == 4
     pmh = doc._.sections[0]
     explanation = doc._.sections[1]
     allergies = doc._.sections[2]
     explanation2 = doc._.sections[3]
     assert pmh.parent is None
     assert explanation.parent.category == "past_medical_history"
     assert allergies.parent is None
     assert explanation2.parent.category == "allergies"
示例#13
0
 def test_context_attributes(self):
     sectionizer = Sectionizer(
         nlp,
         rules=None,
         add_attrs={"past_medical_history": {
             "is_negated": True
         }})
     sectionizer.add(
         [SectionRule("Past Medical History:", "past_medical_history")])
     doc = nlp("Past Medical History: Pneumonia")
     from spacy.tokens import Span
     doc.ents = (Span(doc, 4, 5), )
     sectionizer(doc)
     assert doc.ents[0]._.is_negated is True
示例#14
0
    def test_section(self):
        sectionizer = Sectionizer(nlp, rules=None)
        rule = SectionRule(category="past_medical_history",
                           literal="Past Medical History:")
        sectionizer.add(rule)
        doc = nlp("Past Medical History: PE")
        sectionizer(doc)

        section = doc._.sections[0]
        assert section.category == "past_medical_history"
        assert section.section_span == doc[0:]
        assert section.title_span == doc[0:-1]
        assert section.body_span == doc[-1:]
        assert section.parent is None
        assert section.rule is rule
示例#15
0
    def test_span_attributes(self):
        sectionizer = Sectionizer(nlp, rules=None)
        rule = SectionRule(category="past_medical_history",
                           literal="Past Medical History:")
        sectionizer.add(rule)
        doc = nlp("Past Medical History: PE")
        sectionizer(doc)

        span = doc[-1:]
        assert span._.section is doc._.sections[0]
        assert span._.section_category == "past_medical_history"
        assert span._.section_span == doc[0:]
        assert span._.section_title == doc[0:-1]
        assert span._.section_body == doc[-1:]
        assert span._.section_parent is None
        assert span._.section_rule is rule
示例#16
0
    def test_span_attributes(self):
        sectionizer = Sectionizer(nlp, rules=None)
        sectionizer.add(
            SectionRule(category="past_medical_history",
                        literal="Past Medical History:"))
        doc = nlp("Past Medical History: PE")
        sectionizer(doc)

        token = doc[-1]

        assert len(token._.section)
        assert len(token._.section_category)
        assert len(token._.section_title)
        assert len(token._.section_span)
        assert len(token._.section_body)
        assert len(token._.section_rule)
示例#17
0
    def test_document_starts_no_header(self):
        sectionizer = Sectionizer(nlp, rules=None)
        sectionizer.add(
            SectionRule(category="past_medical_history",
                        literal="Past Medical History:"))
        doc = nlp("This is separate. Past Medical History: PE")
        sectionizer(doc)
        assert len(doc._.sections) == 2
        section = doc._.sections[0]
        assert section.category is None
        assert section.title_span.text == ""
        assert section.body_span.text == "This is separate."

        section = doc._.sections[1]
        assert section.category == "past_medical_history"
        assert section.title_span.text == "Past Medical History:"
        assert section.section_span.text == "Past Medical History: PE"
示例#18
0
 def test_initialize(self):
     assert SectionRule("title", "literal")
示例#19
0
 def test_read_json(self):
     rules = SectionRule.from_json("resources/section_patterns.json")
     assert rules
     for rule in rules:
         assert isinstance(rule, SectionRule)
         assert isinstance(rule, BaseRule)
示例#20
0
 def test_max_scope(self):
     rule = SectionRule(category="past_medical_history",
                        literal="Past Medical History:",
                        max_scope=100)
     assert rule.max_scope == 100
示例#21
0
 def test_section_categories(self):
     sectionizer = Sectionizer(nlp, rules=None)
     sectionizer.add(
         [SectionRule("Past Medical History:", "past_medical_history")])
     assert sectionizer.section_categories == ["past_medical_history"]
示例#22
0
from medspacy.section_detection import SectionRule

section_rules = [
    SectionRule(category="labs", literal="Lab results:"),
    SectionRule(category="addendum", literal="ADDENDUM:"),
    SectionRule(category="addendum", literal="Addendum:"),
    SectionRule(category="allergies", literal="ALLERGIC REACTIONS:"),
    SectionRule(category="allergies", literal="ALLERGIES:"),
    SectionRule(category="chief_complaint", literal="CC:"),
    SectionRule(category="chief_complaint", literal="CHIEF COMPLAINT:"),
    SectionRule(category="chief_complaint", literal="Chief Complaint:"),
    SectionRule(category="comments", literal="COMMENTS:"),
    SectionRule(category="diagnoses", literal="ADMISSION DIAGNOSES:"),
    SectionRule(category="diagnoses", literal="DIAGNOSES:"),
    SectionRule(category="diagnoses", literal="Diagnosis:"),
    SectionRule(category="diagnoses", literal="Primary Diagnosis:"),
    SectionRule(category="diagnoses", literal="Primary:"),
    SectionRule(category="diagnoses", literal="SECONDARY DIAGNOSES:"),
    SectionRule(category="diagnoses", literal="Secondary Diagnoses:"),
    SectionRule(category="diagnoses", literal="Secondary Diagnosis:"),
    SectionRule(category="diagnoses", literal="Secondary:"),
    SectionRule(category="family_history", literal="Family History:"),
    SectionRule(category="hospital_course", literal="Brief Hospital Course:"),
    SectionRule(category="hospital_course", literal="CONCISE SUMMARY OF HOSPITAL COURSE BY ISSUE/SYSTEM:"),
    SectionRule(category="hospital_course", literal="HOSPITAL COURSE:"),
    SectionRule(category="hospital_course", literal="SUMMARY OF HOSPITAL COURSE:"),
    SectionRule(category="imaging", literal="IMAGING:"),
    SectionRule(category="imaging", literal="INTERPRETATION:"),
    SectionRule(category="imaging", literal="Imaging:"),
    SectionRule(category="imaging", literal="MRI:"),
    SectionRule(category="imaging", literal="Radiology:"),
示例#23
0
    ALLOWED_DATA_TYPES,
)
from medspacy.context import ConTextComponent
from medspacy.section_detection import Sectionizer, SectionRule

nlp = spacy.load("en_core_web_sm")
nlp.remove_pipe("ner")

matcher = nlp.add_pipe("entity_ruler")
matcher.add_patterns([{"label": "PROBLEM", "pattern": "cough"}])

nlp.add_pipe("medspacy_context")

sectionizer = nlp.add_pipe("medspacy_sectionizer")
sectionizer.add([
    SectionRule("Section 1:", "section1"),
    SectionRule("Section 2:", "section2", parents=["section1"]),
])

simple_text = "Patient has a cough."
context_text = "Patient has no cough."
section_text = "Section 1: Patient has a cough"
section_parent_text = """Section 1: comment
Section 2: Patient has a cough"""
many_concept_texts = ["cough " * i for i in range(10)]

simple_doc = nlp(simple_text)
context_doc = nlp(context_text)
section_doc = nlp(section_text)
section_parent_doc = nlp(section_parent_text)
many_concept_docs = [nlp(t) for t in many_concept_texts]