def analyzeReport(self, report): """ given an individual radiology report, creates a pyConTextGraph object that contains the context markup report: a text string containing the radiology reports """ context = pyConText.ConTextDocument() targets = self.targets modifiers = self.modifiers splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(report) count = 0 for s in sentences: #print s markup = pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks() markup.dropMarks('Exclusion') markup.applyModifiers() context.addMarkup(markup) context.computeDocumentGraph(verbose=True) return context
def analyzeReport(self, report, mode, modFilters = None ): """given an individual radiology report, creates a pyConTextSql object that contains the context markup report: a text string containing the radiology reports mode: which of the pyConText objects are we using: disease modFilters: """ context = self.context.get(mode) targets = self.targets.get(mode) modifiers = self.modifiers.get(mode) if modFilters == None : modFilters = ['indication','pseudoneg','probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'historical'] context.reset() sentences = helpers.sentenceSplitter(report) count = 0 for s in sentences: context.setTxt(s) context.markItems(modifiers, mode="modifier") context.markItems(targets, mode="target") context.pruneMarks() context.dropMarks('Exclusion') context.applyModifiers() #context.pruneModifierRelationships() context.dropInactiveModifiers() context.commit() count += 1
def analyzeReport(self,csv,eHOST, idName,report, modFilters = ['indication','pseudoneg','probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence','future', 'historical', 'cigarette_units', 'frequency', 'amount', 'current', 'past', 'cessation', "initiation","pack_year", ] ): """given an individual radiology report, creates a pyConTextSql object that contains the context markup report: a text string containing the radiology reports mode: which of the pyConText objects are we using: disease modFilters: """ self.context = pyConText.ConTextDocument() targets=self.targets modifiers = self.modifiers if modFilters == None : modFilters = ['indication','pseudoneg','probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'future', 'historical', 'cigarette_units', 'frequency', 'amount', 'current', 'past', 'cessation', "initiation","pack_year", ] fo=open(os.getcwd()+"/eHOST_FILES/corpus/%s"%idName, "w") fo.write(report.strip()) fo.close() splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(report) count = 0 for s in sentences: markup=pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks() markup.applyModifiers() markup.dropInactiveModifiers() count += 1 self.context.addMarkup(markup) idName, sevFlag, htmlStr = html.mark_document_with_html(csv, eHOST, idName,self.context) self.outString+= self.context.getXML()+u"\n" print self.context.getXML()#;raw_input() return idName, sevFlag, htmlStr
def setUp(self): # create a sample image in memory self.context = pyConText.ConTextMarkup() self.splitter = helpers.sentenceSplitter() self.su1 = u"kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?" self.su2 = u"IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM." self.su3 = u"This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence." self.su4 = u"This is a sentence with a numeric value equal to 1.43 and should not be split into two parts." self.items = [ [u"pulmonary embolism", u"PULMONARY_EMBOLISM", ur"""pulmonary\s(artery )?(embol[a-z]+)""", ""], ["no gross evidence of", "PROBABLE_NEGATED_EXISTENCE", "", "forward"], ]
def setUp(self): # create a sample image in memory self.context = pyConText.ConTextMarkup() self.splitter = helpers.sentenceSplitter() self.su1 = u'kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?' self.su2 = u'IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM.' self.su3 = u'This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence.' self.su4 = u'This is a sentence with a numeric value equal to 1.43 and should not be split into two parts.' self.items = [[ u"pulmonary embolism", u"PULMONARY_EMBOLISM", ur"""pulmonary\s(artery )?(embol[a-z]+)""", "" ], [ "no gross evidence of", "PROBABLE_NEGATED_EXISTENCE", "", "forward" ]]
def analyzeReport(self, report ): """ given an individual radiology report, creates a pyConTextGraph object that contains the context markup report: a text string containing the radiology reports """ context = self.document targets = self.targets modifiers = self.modifiers splitter = helpers.sentenceSplitter() # alternatively you can skip the default exceptions and add your own # splitter = helpers.sentenceSpliter(useDefaults = False) #splitter.addExceptionTerms("Dr.","Mr.","Mrs.",M.D.","R.N.","L.P.N.",addCaseVariants=True) splitter.addExceptionTerms("Ms.","D.O.",addCaseVariants=True) splitter.deleteExceptionTerms("A.B.","B.S.",deleteCaseVariants=True) sentences = splitter.splitSentences(report) count = 0 for s in sentences: #print s markup = pyConText.ConTextMarkup() markup.toggleVerbose() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") #raw_input('marked targets and modifiers') #print "markup before pruning" #print markup.getXML() markup.pruneMarks() markup.dropMarks('Exclusion') # apply modifiers to any targets within the modifiers scope markup.applyModifiers() markup.pruneSelfModifyingRelationships() #context.pruneModifierRelationships() #context.dropInactiveModifiers() # add markup to context document print markup context.addMarkup(markup) count += 1 context.computeDocumentGraph()
def analyzeReport(self, report): """ given an individual radiology report, creates a pyConTextGraph object that contains the context markup report: a text string containing the radiology reports """ context = self.document targets = self.targets modifiers = self.modifiers splitter = helpers.sentenceSplitter() # alternatively you can skip the default exceptions and add your own # splitter = helpers.sentenceSpliter(useDefaults = False) #splitter.addExceptionTerms("Dr.","Mr.","Mrs.",M.D.","R.N.","L.P.N.",addCaseVariants=True) splitter.addExceptionTerms("Ms.", "D.O.", addCaseVariants=True) splitter.deleteExceptionTerms("A.B.", "B.S.", deleteCaseVariants=True) sentences = splitter.splitSentences(report) count = 0 for s in sentences: #print s markup = pyConText.ConTextMarkup() markup.toggleVerbose() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") #raw_input('marked targets and modifiers') #print "markup before pruning" #print markup.getXML() markup.pruneMarks() markup.dropMarks('Exclusion') # apply modifiers to any targets within the modifiers scope markup.applyModifiers() markup.pruneSelfModifyingRelationships() #context.pruneModifierRelationships() #context.dropInactiveModifiers() # add markup to context document print markup context.addMarkup(markup) count += 1 context.computeDocumentGraph()
def analyzeReport(self, report ): """ given an individual radiology report, creates a pyConTextGraph object that contains the context markup report: a text string containing the radiology reports """ context = self.context targets = self.targets modifiers = self.modifiers context.reset() splitter = helpers.sentenceSplitter() # alternatively you can skip the default exceptions and add your own # splitter = helpers.sentenceSpliter(useDefaults = False) #splitter.addExceptionTerms("Dr.","Mr.","Mrs.",M.D.","R.N.","L.P.N.",addCaseVariants=True) splitter.addExceptionTerms("Ms.","D.O.",addCaseVariants=True) splitter.deleteExceptionTerms("A.B.","B.S.",deleteCaseVariants=True) sentences = splitter.splitSentences(report) count = 0 for s in sentences: #print s context.setRawText(s) context.cleanText() context.markItems(modifiers, mode="modifier") context.markItems(targets, mode="target") g= context.getCurrentGraph() ic=0 context.pruneMarks() context.dropMarks('Exclusion') context.applyModifiers() #context.pruneModifierRelationships() #context.dropInactiveModifiers() print context self.outString += context.getXML()+u"\n" context.commit() count += 1 print context.getSectionText() raw_input('continue') context.computeDocumentGraph() ag = nx.to_pydot(context.getDocumentGraph(), strict=True) ag.write("case%03d.pdf"%self.currentCase,format="pdf")
def _splitSentencesSingleDocInternal(documentPath): """Takes a string, returns a list of reconstructed sentences of the form (text, docSpanTuple, docName, docLength, None) to be fed to PyConTextInput.""" sentenceTuples = [] with open(documentPath, 'rU') as inFile: documentText = inFile.read() documentName = Document.ParseDocumentNameFromPath(documentPath) docLength = len(documentText) sentences = sentenceSplitter().splitSentences(documentText) repeatManager = RepeatManger(documentText) reconstructor = Reconstructor(documentText) for sentence in sentences: reconstructedSentence = reconstructor.reconstructSentence(sentence) span = repeatManager.determineSpan(reconstructedSentence) sentenceTuples.append((reconstructedSentence, span, documentName, docLength, None)) return sentenceTuples
def splitter(): return helpers.sentenceSplitter()
def test_sentenceSplitter1(self): """test whether we properly capture text that terminates without a recognized sentence termination""" splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(self.su3) assert len(sentences) == 3
def test_createSentenceSplitter(): assert helpers.sentenceSplitter()
def test_sentenceSplitter2(self): """test whether we properly skip numbers with decimal points.""" splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(self.su4) assert len(sentences) == 1
def analyzeReport( self, idName, report, modFilters=[ 'indication', 'pseudoneg', 'probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'historical', 'carotid_critical', 'carotid_noncritical', 'right_sidedness', 'left_sidedness', 'bilateral_sidedness', 'sidedness', 'common_carotid_neurovascularanatomy', 'bulb_carotid_neurovascularanatomy', 'internal_carotid_neurovascularanatomy' ]): """given an individual radiology report, creates a pyConTextSql object that contains the context markup report: a text string containing the radiology reports mode: which of the pyConText objects are we using: disease modFilters: """ self.context = pyConText.ConTextDocument() targets = self.targets modifiers = self.modifiers if modFilters == None: modFilters = [ 'indication', 'pseudoneg', 'probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'historical', 'carotid_critical', 'carotid_noncritical', 'right_sidedness', 'left_sidedness', 'bilateral_sidedness', 'sidedness', 'bulb_carotid_neurovascularanatomy', 'common_carotid_neurovascularanatomy', 'internal_carotid_neurovascularanatomy', ] splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(report) count = 0 print idName for s in sentences: markup = pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") #markup.pruneMarks() #markup.dropMarks('Exclusion') markup.applyModifiers() #markup.pruneModifierRelationships() markup.dropInactiveModifiers() count += 1 self.context.addMarkup(markup) idName, sevFlag, htmlStr = html.mark_document_with_html( idName, self.context) #;raw_input() # fo=open(self.html_dir+"\\%s.html"%idName, "w") # fo.write(htmlStr) # fo.close() self.outString += self.context.getXML() + u"\n" print self.context.getXML() #;raw_input() return idName, sevFlag, htmlStr
from BoundaryReader import parseSentenceBoundaries from eHostessAddins.SentenceReconstructor import SentenceReconstructor from eHostessAddins.SentenceRepeatManager import SentenceRepeatManager from pyConTextNLP.helpers import sentenceSplitter testDoc = '327000.txt' inFile = open("./ClinicalNotes/Training1/" + testDoc, 'rU') body = inFile.read() inFile.close() testBoundaries, testText = parseSentenceBoundaries(body) reconstructor = SentenceReconstructor(testText) repeatManager = SentenceRepeatManager() sentences = sentenceSplitter().splitSentences(testText) predictedBoundaries = [] for sentence in sentences: reconstructedSentence = reconstructor.reconstructSentence(sentence) docSpan = repeatManager.processSentence(reconstructedSentence, testText) predictedBoundaries.append(docSpan[1]) numTrueBoundaries = len(testBoundaries) numPredictedBoundaries = len(predictedBoundaries) numPredictedCorrect = 0 for boundary in predictedBoundaries: if boundary in testBoundaries: