def analyzeSentence(sentence, targets=targets, modifiers=modifiers): sentence = sentenceNLP.preprocess(sentence, "suicide") print sentence counter = 0 counter += 1 # print "sentence no: "+str(counter)+" - "+sentence context = pyConText.ConTextDocument() markup = pyConText.ConTextMarkup() markup.setRawText(sentence) markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks() markup.dropMarks('Exclusion') markup.applyModifiers() markup.dropInactiveModifiers() context.addMarkup(markup) g = context.getDocumentGraph() ma = g.getMarkedTargets() print g # if len(ma)==0: # print sentence for te in ma: print te return getNegationValue(g, te) return None
def process(self, doc_text): """PyContextNLP, return doc_class, context_doc, annotations, relations""" context_doc = pyConTextGraph.ConTextDocument() sentences = self.sentence_segmenter.segToSentenceSpans(doc_text) for sentence in sentences: sentence_text = doc_text[sentence.begin:sentence.end].lower() # Process every sentence by adding markup m = markup_sentence(sentence_text, modifiers=self.modifiers, targets=self.targets) context_doc.addMarkup(m) context_doc.getSectionMarkups() # print(m) # print(context_doc.getXML()) # convert graphic markups into dataframe markups = get_document_markups(context_doc) annotations, relations, doc_txt = convertMarkups2DF(markups) # display(annotations) # display(relations) # apply inferences for document classication inferenced_types = self.feature_inferencer.process( annotations, relations) # print('After inferred from modifier values, we got these types:\n '+str(inferenced_types)) doc_class = self.document_inferencer.process(inferenced_types) # print('\nDocument classification: '+ doc_class ) return doc_class, context_doc, annotations, relations
def __init__(self, options): """create an instance of a criticalFinder object associated with the SQLite database. dbname: name of SQLite database """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT %s,%s FROM %s''' % ( options.id, options.report_text, options.table) self.conn = sqlite.connect(options.dbname) self.cursor = self.conn.cursor() self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process", len(self.reports) self.document = pyConText.ConTextDocument() self.modifiers = itemData.instantiateFromCSVtoitemData( options.lexical_kb, literalColumn=0, categoryColumn=1, regexColumn=2, ruleColumn=3) self.targets = itemData.instantiateFromCSVtoitemData(options.domain_kb, literalColumn=0, categoryColumn=1, regexColumn=2, ruleColumn=3)
def analyzeReport(self, report): """ given an individual radiology report, creates a pyConTextGraph object that contains the context markup report: a text string containing the radiology reports """ context = pyConText.ConTextDocument() targets = self.targets modifiers = self.modifiers splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(report) count = 0 for s in sentences: #print s markup = pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks() markup.dropMarks('Exclusion') markup.applyModifiers() context.addMarkup(markup) context.computeDocumentGraph(verbose=True) return context
def markup_context_document(report_text, modifiers, targets): context = pyConTextGraph.ConTextDocument() # Split up into sentences sentences = tokenize_sents(report_text) for sentence in sentences: m = markup_sentence(sentence, modifiers=modifiers, targets=targets) context.addMarkup(m) return context
def markup_context_document(report_text, modifiers, targets): context = pyConTextGraph.ConTextDocument() # we will use TextBlob for breaking up sentences sentences = [s.raw for s in TextBlob(report_text).sentences] for sentence in sentences: m = markup_sentence(sentence, modifiers=modifiers, targets=targets) context.addMarkup(m) return context
def analyzeReport(self,csv,eHOST, idName,report, modFilters = ['indication','pseudoneg','probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence','future', 'historical', 'cigarette_units', 'frequency', 'amount', 'current', 'past', 'cessation', "initiation","pack_year", ] ): """given an individual radiology report, creates a pyConTextSql object that contains the context markup report: a text string containing the radiology reports mode: which of the pyConText objects are we using: disease modFilters: """ self.context = pyConText.ConTextDocument() targets=self.targets modifiers = self.modifiers if modFilters == None : modFilters = ['indication','pseudoneg','probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'future', 'historical', 'cigarette_units', 'frequency', 'amount', 'current', 'past', 'cessation', "initiation","pack_year", ] fo=open(os.getcwd()+"/eHOST_FILES/corpus/%s"%idName, "w") fo.write(report.strip()) fo.close() splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(report) count = 0 for s in sentences: markup=pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks() markup.applyModifiers() markup.dropInactiveModifiers() count += 1 self.context.addMarkup(markup) idName, sevFlag, htmlStr = html.mark_document_with_html(csv, eHOST, idName,self.context) self.outString+= self.context.getXML()+u"\n" print self.context.getXML()#;raw_input() return idName, sevFlag, htmlStr
def split_sentences(self, report, modifiers, targets): blob = TextBlob(report.lower()) count = 0 rslts = [] for s in blob.sentences: m = self.markup_sentence(s.raw, modifiers, targets) rslts.append(m) context = pyConText.ConTextDocument() for r in rslts: context.addMarkup(r) return context
def analyzeSentence(sentence, targets=targets, modifiers=modifiers, tagExperiencer=False): #sentence = sentenceNLP.preprocess(sentence, "suicide") counter = 0 counter += 1 # print "sentence no: "+str(counter)+" - "+sentence context = pyConText.ConTextDocument() markup = pyConText.ConTextMarkup() markup.setRawText(sentence) markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") print markup.getConTextModeNodes('modifier') markup.pruneMarks() markup.dropMarks('Exclusion') markup.applyModifiers() markup.dropInactiveModifiers() markup.updateScopes() context.addMarkup(markup) g = context.getDocumentGraph() #print "graph: ",g ma = g.getMarkedTargets() # if len(ma)==0: # print sentence tst = [] details = [] found = {} for te in ma: #print ma tmp1, tmp2 = getNegationValue(g, te) if tagExperiencer: e1, e2 = getExperiencerValue(g, te) if e1 != 'Other': #print e1 #print sentence tst.append(tmp1) details.append(tmp2) found[tmp2] = Counter(tmp1) else: tst.append(tmp1) details.append(tmp2) found[tmp2] = Counter(tmp1) #print tmp1, tmp2 #print e1, e2 #print tst, details return tst, details
def processReports(self): """For the selected reports (training or testing) in the database, process each report with peFinder """ count = 0 for r in self.reports[0:20]: self.document = pyConText.ConTextDocument() self.currentCase = r[0] self.currentText = r[1].lower() print "CurrentCase:", self.currentCase self.outString += u"""<case>\n<caseNumber> %s </caseNumber>\n""" % self.currentCase self.analyzeReport(self.currentText) self.outString += u"</case>\n" rslts = self.classifyDocumentTargets() print rslts raw_input('continue') print "_" * 48
def markup_context_document(self, report_text, modifiers, targets): context = pyConTextGraph.ConTextDocument() # we will use TextBlob for breaking up sentences if self.pyrush is None: from textblob import TextBlob sentences = [s.raw for s in TextBlob(report_text).sentences] else: sentences = [ report_text[sentence.begin:sentence.end] for sentence in self.pyrush.segToSentenceSpans(report_text) ] for sentence in sentences: m = markup_sentence(sentence, modifiers=modifiers, targets=targets) context.addMarkup(m) context.getSectionMarkups() return context
def processReports(self): """For the selected reports (training or testing) in the database, process each report """ for r in self.reports: #if r[0] in [77,2619,3030,3330]: self.document = pyConText.ConTextDocument() #try: if (True): self.currentCase = r[0] self.currentText = r[1].lower() print "CurrentCase:", self.currentCase print r[1].lower() self.analyzeReport(self.currentText) if (self.debug): self.writeDebugInfo() rslts = self.classifyDocumentTargets() self.commitResults(rslts) self.conn.commit()
def analyzeReport(self, report): """ given an individual radiology report, creates a pyConTextGraph object that contains the context markup report: a text string containing the radiology reports """ context = pyConText.ConTextDocument() targets = self.targets modifiers = self.modifiers splitter = helpers.sentenceSplitter() # alternatively you can skip the default exceptions and add your own # splitter = helpers.sentenceSpliter(useDefaults = False) #splitter.addExceptionTerms("Dr.","Mr.","Mrs.",M.D.","R.N.","L.P.N.",addCaseVariants=True) splitter.addExceptionTerms("Ms.", "D.O.", addCaseVariants=True) splitter.deleteExceptionTerms("A.B.", "B.S.", deleteCaseVariants=True) sentences = splitter.splitSentences(report) count = 0 for s in sentences: #print s markup = pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") markup.pruneMarks() markup.dropMarks('Exclusion') markup.applyModifiers() print markup context.addMarkup(markup) self.outString += context.getXML() print context.getSectionText() #raw_input('continue') context.computeDocumentGraph(verbose=True) ag = nx.to_pydot(context.getDocumentGraph(), strict=True) ag.write("case%03d.pdf" % self.currentCase, format="pdf") #print "*"*42 #print context.getXML(currentGraph=False) #print "*"*42 raw_input('continue')
def annotate_sentence(targets, modifiers, doc): """Annotate a spaCy Document for lexical targets and lexical modifiers. pyConTextNLP uses NetworkX directional graphs to represent the markup; nodes in the graph will be the concepts that are identified in the sentence and edges in the graph will be the relationships between those concepts. Args: targets (pyConTextNLP.itemData.itemData): itemData stores a literal, category, regular expression, and rule of the targets extracted from the targets_file input. modifiers (pyConTextNLP.itemData.itemData): itemData stores a literal, category, regular expression, and rule of the modifiers extracted from the modifiers_file input. doc (spacy.tokens.doc.Doc): spaCy Document containing the radiology report. Returns: context (pyConTextNLP.pyConTextGraph.ConTextDocument): object containing sentence markups across the report understood as a digraph of the relationships between lexical targets and lexical modifiers. """ # Create the pyConText instance for the report context = pyConText.ConTextDocument() # Split the report into individual sentences sentences = [sent.string.strip() for sent in doc.sents] # For the report, markup sentences, with span and modifier pruning, and add markup to context for s in sentences: markup = markup_sentence(targets, modifiers, s.lower()) context.addMarkup(markup) return context
def analyzeReport( self, idName, report, modFilters=[ 'indication', 'pseudoneg', 'probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'historical', 'carotid_critical', 'carotid_noncritical', 'right_sidedness', 'left_sidedness', 'bilateral_sidedness', 'sidedness', 'common_carotid_neurovascularanatomy', 'bulb_carotid_neurovascularanatomy', 'internal_carotid_neurovascularanatomy' ]): """given an individual radiology report, creates a pyConTextSql object that contains the context markup report: a text string containing the radiology reports mode: which of the pyConText objects are we using: disease modFilters: """ self.context = pyConText.ConTextDocument() targets = self.targets modifiers = self.modifiers if modFilters == None: modFilters = [ 'indication', 'pseudoneg', 'probable_negated_existence', 'definite_negated_existence', 'probable_existence', 'definite_existence', 'historical', 'carotid_critical', 'carotid_noncritical', 'right_sidedness', 'left_sidedness', 'bilateral_sidedness', 'sidedness', 'bulb_carotid_neurovascularanatomy', 'common_carotid_neurovascularanatomy', 'internal_carotid_neurovascularanatomy', ] splitter = helpers.sentenceSplitter() sentences = splitter.splitSentences(report) count = 0 print idName for s in sentences: markup = pyConText.ConTextMarkup() markup.setRawText(s) markup.cleanText() markup.markItems(modifiers, mode="modifier") markup.markItems(targets, mode="target") #markup.pruneMarks() #markup.dropMarks('Exclusion') markup.applyModifiers() #markup.pruneModifierRelationships() markup.dropInactiveModifiers() count += 1 self.context.addMarkup(markup) idName, sevFlag, htmlStr = html.mark_document_with_html( idName, self.context) #;raw_input() # fo=open(self.html_dir+"\\%s.html"%idName, "w") # fo.write(htmlStr) # fo.close() self.outString += self.context.getXML() + u"\n" print self.context.getXML() #;raw_input() return idName, sevFlag, htmlStr
def create_context_doc(list_of_markups, modifiers=modifiers, targets=targets): """Creates a ConText document out of a list of markups.""" context_doc = pyConText.ConTextDocument() for m in list_of_markups: context_doc.addMarkup(m) return context_doc
def __init__(self, options): """create an instance of a criticalFinder object associated with the SQLite database. dbname: name of SQLite database """ # Define queries to select data from the SQLite database # this gets the reports we will process self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table) t = time.localtime() self.save_dir = options.save_dir#+"-%s-%s-%s"%(t[0],t[1],t[2]) count = 1 if( not os.path.exists(self.save_dir) ): os.mkdir(self.save_dir) self.html_dir=self.save_dir+"/html/" if( not os.path.exists(self.html_dir) ): os.mkdir(self.html_dir) print options.dbname self.doGraphs = options.doGraphs self.allow_uncertainty = options.allow_uncertainty self.proc_category = options.category self.conn = sqlite.connect(options.dbname+".db") print options.dbname+".db" self.cursor = self.conn.cursor() print self.query1 self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process",len(self.reports) #raw_input('continue') tmp = os.path.splitext(options.odbname) outfile = tmp[0]+self.proc_category+"_%s.db"%(self.allow_uncertainty) rsltsDB = os.path.join(self.save_dir,outfile) if( os.path.exists(rsltsDB) ): os.remove(rsltsDB) #old database output by DM self.resultsConn = sqlite.connect(rsltsDB) self.resultsCursor = self.resultsConn.cursor() # self.resultsCursor.execute("""CREATE TABLE alerts ( reportid TEXT, smokingStatus TEXT, report TEXT)""") # Create the itemData object to store the modifiers for the analysis # starts with definitions defined in pyConText and then adds # definitions specific for peFinder #DM - addition self.context=pyConText.ConTextDocument() mods=itemData.instantiateFromCSV(options.lexical_kb) trgs=itemData.instantiateFromCSV(options.Hx_kb) self.modifiers = itemData.itemData() for mod in mods.keys(): self.modifiers.prepend(mods[mod]) self.targets = itemData.itemData() for trg in trgs.keys(): self.targets.prepend(trgs[trg])
No vomiting, chest pain, shortness of breath, nausea, dizziness, or chills on arrival. On operative day three, the patient fever was detected with temperature 101.5 F. After 3 days no fever was detected. Patient came back for a follow up, denies fever. ''' sentences = sentence_segmenter.segToSentenceSpans(input) # See what the document was splitted into for sentence in sentences: print("Sentence({}-{}):\t{}".format(sentence.begin, sentence.end, input[sentence.begin:sentence.end])) print('\n' + '-' * 100 + '\n') # initiate a pyConTextGraph to hold the pyConText output context_doc = pyConTextGraph.ConTextDocument() for sentence in sentences: sentence_text = input[sentence.begin:sentence.end].lower() # Process every sentence by adding markup m = markup_sentence(sentence_text, modifiers=modifiers, targets=targets) context_doc.addMarkup(m) context_doc.getSectionMarkups() print(m) # convert graphic markups into dataframe markups = get_document_markups(context_doc) annotations, relations, doc_txt = convertMarkups2DF(markups) head(annotations)
self.table = table self.result_label = result_label self.query1 = '''SELECT %s,%s FROM %s''' % (self.rid, self.column, self.table) print self.query1 self.mode = mode self.dbname = dbname self.getDBConnection(self.dbname) # get reports to process self.cursor.execute(self.query1) self.reports = self.cursor.fetchall() print "number of reports to process", len(self.reports) # Create the pyConTextNLP ConTextDocument. This is the container for all the markups self.document = pyConText.ConTextDocument() self.modifiers = itemData.itemData() self.targets = itemData.itemData() for kb in lexical_kb: self.modifiers.extend(itemData.instantiateFromCSVtoitemData(kb)) for kb in domain_kb: self.targets.extend(itemData.instantiateFromCSVtoitemData(kb)) self.debug = debug if (self.debug): print "debug set to True" tmp = os.path.splitext(self.dbname) self.debugDir = tmp[0] + "_debug_dir" if (not os.path.exists(self.debugDir)): os.mkdir(self.debugDir)