def test_max_scope_none(self): sectionizer = Sectionizer(nlp, rules=None, max_scope=None) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) doc = nlp("Past Medical History: This is the sentence.") sectionizer(doc) assert doc[-1]._.section_category == "past_medical_history"
def test_parent_section_multiple_candidates(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add([ SectionRule(category="past_medical_history", literal="Past Medical History:"), SectionRule(category="explanation", literal="Explanation:", parents=["past_medical_history", "allergies"]), ]) text = "Past Medical History: some other text. Explanation: The patient has one" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 2 pmh = doc._.sections[0] explanation = doc._.sections[1] assert pmh.parent is None assert explanation.parent.category == "past_medical_history"
def test_parent_section_chain(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add([ SectionRule(category="s1", literal="section 1:"), SectionRule(category="s2", literal="section 2:", parents=["s1"]), SectionRule(category="s3", literal="section 3:", parents=["s2"]), ]) text = "section 1: abc section 2: abc section 3: abc" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 3 s1 = doc._.sections[0] s2 = doc._.sections[1] s3 = doc._.sections[2] assert s1.parent is None assert s2.parent.category == "s1" assert s3.parent.category == "s2"
def test_parent_section_parent_required(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add([ SectionRule(category="past_medical_history", literal="Past Medical History:"), SectionRule(category="explanation", literal="Explanation:", parents=["past_medical_history"], parent_required=True), ]) text = "other text Explanation: The patient has one" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 1 section = doc._.sections[0] print(section) assert section.category is None assert section.parent is None
def test_end_line(self): sectionizer = Sectionizer(nlp, rules=None, require_end_line=True) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) text = "\n\n Past Medical History:\n The patient has a Past Medical History: this" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 2
def test_parent_section_no_valid_parent(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add([ SectionRule(category="past_medical_history", literal="Past Medical History:"), SectionRule(category="allergies", literal="Allergies:"), SectionRule(category="explanation", literal="Explanation:", parents=["past_medical_history"]), ]) text = "Past Medical History: some other text. Allergies: peanuts Explanation: pt cannot eat peanuts" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 3 pmh = doc._.sections[0] allergies = doc._.sections[1] explanation = doc._.sections[2] assert pmh.parent is None assert allergies.parent is None assert explanation.parent is None
def test_max_scope(self): sectionizer = Sectionizer(nlp, rules=None, max_scope=2) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) doc = nlp("Past Medical History: This is the sentence.") sectionizer(doc) section = doc._.sections[0] assert section.body_span[ 0]._.section_category == "past_medical_history" # This should be out of range of the section scope assert section.body_span[3]._.section_category is None
def test_num_sections(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) doc = nlp("Past Medical History: PE") sectionizer(doc) assert len(doc._.sections) == 1 # Now reprocess and make sure it resets doc = nlp("Past Medical History: PE") sectionizer(doc) assert len(doc._.sections) == 1
def test_parent_section_chain_backtracking_interrupted(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add([ SectionRule(category="s1", literal="section 1:"), SectionRule(category="s2", literal="section 2:", parents=["s1"]), SectionRule(category="s3", literal="section 3:", parents=["s2"]), SectionRule(category="s4", literal="section 4:", parents=["s1"]), SectionRule(category="break", literal="section break:"), ]) text = "section 1: abc section 2: abc section 3: abc section break: abc section 4: abc" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 5 s1 = doc._.sections[0] s2 = doc._.sections[1] s3 = doc._.sections[2] s4 = doc._.sections[4] assert s1.parent is None assert s2.parent.category == "s1" assert s3.parent.category == "s2" assert s4.parent is None
def test_max_scope_rule(self): sectionizer = Sectionizer(nlp, rules=None, max_scope=2) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:", max_scope=100)) doc = nlp("Past Medical History: This is the sentence.") sectionizer(doc) section = doc._.sections[-1] token = doc[-1] assert section.category == "past_medical_history" assert token in section.section_span assert token._.section_category == "past_medical_history"
def test_doc_attributes(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) doc = nlp("Past Medical History: PE") sectionizer(doc) assert len(doc._.sections) assert len(doc._.section_categories) assert len(doc._.section_titles) assert len(doc._.section_spans) assert len(doc._.section_bodies)
def test_parent_section_duplicate_sections_different_parents(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add([ SectionRule(category="past_medical_history", literal="Past Medical History:"), SectionRule(category="allergies", literal="Allergies:"), SectionRule(category="explanation", literal="Explanation:", parents=["past_medical_history", "allergies"]), ]) text = "Past Medical History: some other text. Explanation: The patient has one. Allergies: peanuts Explanation: pt cannot eat peanuts" doc = nlp(text) sectionizer(doc) assert len(doc._.sections) == 4 pmh = doc._.sections[0] explanation = doc._.sections[1] allergies = doc._.sections[2] explanation2 = doc._.sections[3] assert pmh.parent is None assert explanation.parent.category == "past_medical_history" assert allergies.parent is None assert explanation2.parent.category == "allergies"
def test_context_attributes(self): sectionizer = Sectionizer( nlp, rules=None, add_attrs={"past_medical_history": { "is_negated": True }}) sectionizer.add( [SectionRule("Past Medical History:", "past_medical_history")]) doc = nlp("Past Medical History: Pneumonia") from spacy.tokens import Span doc.ents = (Span(doc, 4, 5), ) sectionizer(doc) assert doc.ents[0]._.is_negated is True
def test_section(self): sectionizer = Sectionizer(nlp, rules=None) rule = SectionRule(category="past_medical_history", literal="Past Medical History:") sectionizer.add(rule) doc = nlp("Past Medical History: PE") sectionizer(doc) section = doc._.sections[0] assert section.category == "past_medical_history" assert section.section_span == doc[0:] assert section.title_span == doc[0:-1] assert section.body_span == doc[-1:] assert section.parent is None assert section.rule is rule
def test_span_attributes(self): sectionizer = Sectionizer(nlp, rules=None) rule = SectionRule(category="past_medical_history", literal="Past Medical History:") sectionizer.add(rule) doc = nlp("Past Medical History: PE") sectionizer(doc) span = doc[-1:] assert span._.section is doc._.sections[0] assert span._.section_category == "past_medical_history" assert span._.section_span == doc[0:] assert span._.section_title == doc[0:-1] assert span._.section_body == doc[-1:] assert span._.section_parent is None assert span._.section_rule is rule
def test_span_attributes(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) doc = nlp("Past Medical History: PE") sectionizer(doc) token = doc[-1] assert len(token._.section) assert len(token._.section_category) assert len(token._.section_title) assert len(token._.section_span) assert len(token._.section_body) assert len(token._.section_rule)
def test_document_starts_no_header(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add( SectionRule(category="past_medical_history", literal="Past Medical History:")) doc = nlp("This is separate. Past Medical History: PE") sectionizer(doc) assert len(doc._.sections) == 2 section = doc._.sections[0] assert section.category is None assert section.title_span.text == "" assert section.body_span.text == "This is separate." section = doc._.sections[1] assert section.category == "past_medical_history" assert section.title_span.text == "Past Medical History:" assert section.section_span.text == "Past Medical History: PE"
def test_initialize(self): assert SectionRule("title", "literal")
def test_read_json(self): rules = SectionRule.from_json("resources/section_patterns.json") assert rules for rule in rules: assert isinstance(rule, SectionRule) assert isinstance(rule, BaseRule)
def test_max_scope(self): rule = SectionRule(category="past_medical_history", literal="Past Medical History:", max_scope=100) assert rule.max_scope == 100
def test_section_categories(self): sectionizer = Sectionizer(nlp, rules=None) sectionizer.add( [SectionRule("Past Medical History:", "past_medical_history")]) assert sectionizer.section_categories == ["past_medical_history"]
from medspacy.section_detection import SectionRule section_rules = [ SectionRule(category="labs", literal="Lab results:"), SectionRule(category="addendum", literal="ADDENDUM:"), SectionRule(category="addendum", literal="Addendum:"), SectionRule(category="allergies", literal="ALLERGIC REACTIONS:"), SectionRule(category="allergies", literal="ALLERGIES:"), SectionRule(category="chief_complaint", literal="CC:"), SectionRule(category="chief_complaint", literal="CHIEF COMPLAINT:"), SectionRule(category="chief_complaint", literal="Chief Complaint:"), SectionRule(category="comments", literal="COMMENTS:"), SectionRule(category="diagnoses", literal="ADMISSION DIAGNOSES:"), SectionRule(category="diagnoses", literal="DIAGNOSES:"), SectionRule(category="diagnoses", literal="Diagnosis:"), SectionRule(category="diagnoses", literal="Primary Diagnosis:"), SectionRule(category="diagnoses", literal="Primary:"), SectionRule(category="diagnoses", literal="SECONDARY DIAGNOSES:"), SectionRule(category="diagnoses", literal="Secondary Diagnoses:"), SectionRule(category="diagnoses", literal="Secondary Diagnosis:"), SectionRule(category="diagnoses", literal="Secondary:"), SectionRule(category="family_history", literal="Family History:"), SectionRule(category="hospital_course", literal="Brief Hospital Course:"), SectionRule(category="hospital_course", literal="CONCISE SUMMARY OF HOSPITAL COURSE BY ISSUE/SYSTEM:"), SectionRule(category="hospital_course", literal="HOSPITAL COURSE:"), SectionRule(category="hospital_course", literal="SUMMARY OF HOSPITAL COURSE:"), SectionRule(category="imaging", literal="IMAGING:"), SectionRule(category="imaging", literal="INTERPRETATION:"), SectionRule(category="imaging", literal="Imaging:"), SectionRule(category="imaging", literal="MRI:"), SectionRule(category="imaging", literal="Radiology:"),
ALLOWED_DATA_TYPES, ) from medspacy.context import ConTextComponent from medspacy.section_detection import Sectionizer, SectionRule nlp = spacy.load("en_core_web_sm") nlp.remove_pipe("ner") matcher = nlp.add_pipe("entity_ruler") matcher.add_patterns([{"label": "PROBLEM", "pattern": "cough"}]) nlp.add_pipe("medspacy_context") sectionizer = nlp.add_pipe("medspacy_sectionizer") sectionizer.add([ SectionRule("Section 1:", "section1"), SectionRule("Section 2:", "section2", parents=["section1"]), ]) simple_text = "Patient has a cough." context_text = "Patient has no cough." section_text = "Section 1: Patient has a cough" section_parent_text = """Section 1: comment Section 2: Patient has a cough""" many_concept_texts = ["cough " * i for i in range(10)] simple_doc = nlp(simple_text) context_doc = nlp(context_text) section_doc = nlp(section_text) section_parent_doc = nlp(section_parent_text) many_concept_docs = [nlp(t) for t in many_concept_texts]