示例#1
0
    def analyzeSentence(sentence, targets=targets, modifiers=modifiers):
        sentence = sentenceNLP.preprocess(sentence, "suicide")
        print sentence
        counter = 0
        counter += 1
        # print "sentence no: "+str(counter)+" - "+sentence
        context = pyConText.ConTextDocument()
        markup = pyConText.ConTextMarkup()
        markup.setRawText(sentence)
        markup.markItems(modifiers, mode="modifier")
        markup.markItems(targets, mode="target")

        markup.pruneMarks()
        markup.dropMarks('Exclusion')
        markup.applyModifiers()

        markup.dropInactiveModifiers()

        context.addMarkup(markup)
        g = context.getDocumentGraph()

        ma = g.getMarkedTargets()
        print g
        # if len(ma)==0:
        # 	print sentence
        for te in ma:
            print te
            return getNegationValue(g, te)

        return None
示例#2
0
    def process(self, doc_text):
        """PyContextNLP, return doc_class, context_doc, annotations, relations"""

        context_doc = pyConTextGraph.ConTextDocument()
        sentences = self.sentence_segmenter.segToSentenceSpans(doc_text)

        for sentence in sentences:

            sentence_text = doc_text[sentence.begin:sentence.end].lower()
            # Process every sentence by adding markup
            m = markup_sentence(sentence_text,
                                modifiers=self.modifiers,
                                targets=self.targets)
            context_doc.addMarkup(m)
            context_doc.getSectionMarkups()
            # print(m)
            # print(context_doc.getXML())

        # convert graphic markups into dataframe
        markups = get_document_markups(context_doc)
        annotations, relations, doc_txt = convertMarkups2DF(markups)
        # display(annotations)
        # display(relations)

        # apply inferences for document classication
        inferenced_types = self.feature_inferencer.process(
            annotations, relations)
        # print('After inferred from modifier values, we got these types:\n '+str(inferenced_types))
        doc_class = self.document_inferencer.process(inferenced_types)
        # print('\nDocument classification: '+ doc_class )

        return doc_class, context_doc, annotations, relations
示例#3
0
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT %s,%s FROM %s''' % (
            options.id, options.report_text, options.table)

        self.conn = sqlite.connect(options.dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process", len(self.reports)
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.instantiateFromCSVtoitemData(
            options.lexical_kb,
            literalColumn=0,
            categoryColumn=1,
            regexColumn=2,
            ruleColumn=3)
        self.targets = itemData.instantiateFromCSVtoitemData(options.domain_kb,
                                                             literalColumn=0,
                                                             categoryColumn=1,
                                                             regexColumn=2,
                                                             ruleColumn=3)
示例#4
0
    def analyzeReport(self, report):
        """
        given an individual radiology report, creates a pyConTextGraph
        object that contains the context markup
        report: a text string containing the radiology reports
        """
        context = pyConText.ConTextDocument()
        targets = self.targets
        modifiers = self.modifiers
        splitter = helpers.sentenceSplitter()
        sentences = splitter.splitSentences(report)
        count = 0
        for s in sentences:
            #print s
            markup = pyConText.ConTextMarkup()
            markup.setRawText(s)
            markup.cleanText()
            markup.markItems(modifiers, mode="modifier")
            markup.markItems(targets, mode="target")
            markup.pruneMarks()
            markup.dropMarks('Exclusion')
            markup.applyModifiers()
            context.addMarkup(markup)

        context.computeDocumentGraph(verbose=True)
        return context
def markup_context_document(report_text, modifiers, targets):
    context = pyConTextGraph.ConTextDocument()

    # Split up into sentences
    sentences = tokenize_sents(report_text)
    for sentence in sentences:
        m = markup_sentence(sentence, modifiers=modifiers, targets=targets)
        context.addMarkup(m)

    return context
def markup_context_document(report_text, modifiers, targets):
    context = pyConTextGraph.ConTextDocument()

    # we will use TextBlob for breaking up sentences
    sentences = [s.raw for s in TextBlob(report_text).sentences]
    for sentence in sentences:
        m = markup_sentence(sentence, modifiers=modifiers, targets=targets)
        context.addMarkup(m)

    return context
    def analyzeReport(self,csv,eHOST, idName,report, modFilters = ['indication','pseudoneg','probable_negated_existence',
                          'definite_negated_existence', 'probable_existence',
                          'definite_existence','future', 'historical', 'cigarette_units', 'frequency', 
                          'amount', 'current', 'past', 'cessation', "initiation","pack_year", ]
        ):
        """given an individual radiology report, creates a pyConTextSql
        object that contains the context markup

        report: a text string containing the radiology reports
        mode: which of the pyConText objects are we using: disease
        modFilters: """

        self.context = pyConText.ConTextDocument()
        targets=self.targets
        modifiers = self.modifiers
        if modFilters == None :
           modFilters = ['indication','pseudoneg','probable_negated_existence',
                          'definite_negated_existence', 'probable_existence',
                          'definite_existence', 'future', 'historical', 'cigarette_units', 'frequency', 
                          'amount', 'current', 'past', 'cessation', "initiation","pack_year", ]

        
        fo=open(os.getcwd()+"/eHOST_FILES/corpus/%s"%idName, "w")
        fo.write(report.strip())
        fo.close()
        
        splitter = helpers.sentenceSplitter()
        sentences = splitter.splitSentences(report)
        count = 0

        
        for s in sentences:

            markup=pyConText.ConTextMarkup()
            markup.setRawText(s)
            markup.cleanText() 
            markup.markItems(modifiers, mode="modifier")
            markup.markItems(targets, mode="target")
            markup.pruneMarks()
            markup.applyModifiers()
            markup.dropInactiveModifiers()
            count += 1
            
            self.context.addMarkup(markup)

            
        idName, sevFlag, htmlStr = html.mark_document_with_html(csv, eHOST, idName,self.context)

            
        self.outString+= self.context.getXML()+u"\n"
        print self.context.getXML()#;raw_input()
        return  idName, sevFlag,  htmlStr 
    def split_sentences(self, report, modifiers, targets):
        blob = TextBlob(report.lower())
        count = 0
        rslts = []
        for s in blob.sentences:
            m = self.markup_sentence(s.raw, modifiers, targets)
            rslts.append(m)

        context = pyConText.ConTextDocument()
        for r in rslts:
            context.addMarkup(r)

        return context
示例#9
0
    def analyzeSentence(sentence,
                        targets=targets,
                        modifiers=modifiers,
                        tagExperiencer=False):
        #sentence = sentenceNLP.preprocess(sentence, "suicide")
        counter = 0
        counter += 1
        # print "sentence no: "+str(counter)+" - "+sentence
        context = pyConText.ConTextDocument()
        markup = pyConText.ConTextMarkup()
        markup.setRawText(sentence)
        markup.markItems(modifiers, mode="modifier")
        markup.markItems(targets, mode="target")
        print markup.getConTextModeNodes('modifier')

        markup.pruneMarks()
        markup.dropMarks('Exclusion')
        markup.applyModifiers()

        markup.dropInactiveModifiers()
        markup.updateScopes()

        context.addMarkup(markup)
        g = context.getDocumentGraph()
        #print "graph: ",g
        ma = g.getMarkedTargets()
        # if len(ma)==0:
        # 	print sentence
        tst = []
        details = []
        found = {}
        for te in ma:
            #print ma
            tmp1, tmp2 = getNegationValue(g, te)
            if tagExperiencer:
                e1, e2 = getExperiencerValue(g, te)
                if e1 != 'Other':
                    #print e1
                    #print sentence
                    tst.append(tmp1)
                    details.append(tmp2)
                    found[tmp2] = Counter(tmp1)
            else:
                tst.append(tmp1)
                details.append(tmp2)
                found[tmp2] = Counter(tmp1)
            #print tmp1, tmp2
            #print e1, e2

        #print tst, details
        return tst, details
示例#10
0
 def processReports(self):
     """For the selected reports (training or testing) in the database,
     process each report with peFinder
     """
     count = 0
     for r in self.reports[0:20]:
         self.document = pyConText.ConTextDocument()
         self.currentCase = r[0]
         self.currentText = r[1].lower()
         print "CurrentCase:", self.currentCase
         self.outString += u"""<case>\n<caseNumber> %s </caseNumber>\n""" % self.currentCase
         self.analyzeReport(self.currentText)
         self.outString += u"</case>\n"
         rslts = self.classifyDocumentTargets()
         print rslts
         raw_input('continue')
     print "_" * 48
示例#11
0
    def markup_context_document(self, report_text, modifiers, targets):
        context = pyConTextGraph.ConTextDocument()

        # we will use TextBlob for breaking up sentences
        if self.pyrush is None:
            from textblob import TextBlob
            sentences = [s.raw for s in TextBlob(report_text).sentences]
        else:
            sentences = [
                report_text[sentence.begin:sentence.end]
                for sentence in self.pyrush.segToSentenceSpans(report_text)
            ]
        for sentence in sentences:
            m = markup_sentence(sentence, modifiers=modifiers, targets=targets)
            context.addMarkup(m)
            context.getSectionMarkups()

        return context
示例#12
0
 def processReports(self):
     """For the selected reports (training or testing) in the database,
     process each report 
     """
     for r in self.reports:
         #if r[0] in [77,2619,3030,3330]:
         self.document = pyConText.ConTextDocument()
         #try:
         if (True):
             self.currentCase = r[0]
             self.currentText = r[1].lower()
             print "CurrentCase:", self.currentCase
             print r[1].lower()
             self.analyzeReport(self.currentText)
             if (self.debug):
                 self.writeDebugInfo()
             rslts = self.classifyDocumentTargets()
             self.commitResults(rslts)
             self.conn.commit()
示例#13
0
    def analyzeReport(self, report):
        """
        given an individual radiology report, creates a pyConTextGraph
        object that contains the context markup
        report: a text string containing the radiology reports
        """
        context = pyConText.ConTextDocument()
        targets = self.targets
        modifiers = self.modifiers
        splitter = helpers.sentenceSplitter()
        # alternatively you can skip the default exceptions and add your own
        #       splitter = helpers.sentenceSpliter(useDefaults = False)
        #splitter.addExceptionTerms("Dr.","Mr.","Mrs.",M.D.","R.N.","L.P.N.",addCaseVariants=True)
        splitter.addExceptionTerms("Ms.", "D.O.", addCaseVariants=True)
        splitter.deleteExceptionTerms("A.B.", "B.S.", deleteCaseVariants=True)
        sentences = splitter.splitSentences(report)
        count = 0
        for s in sentences:
            #print s
            markup = pyConText.ConTextMarkup()
            markup.setRawText(s)
            markup.cleanText()
            markup.markItems(modifiers, mode="modifier")
            markup.markItems(targets, mode="target")
            markup.pruneMarks()
            markup.dropMarks('Exclusion')
            markup.applyModifiers()
            print markup
            context.addMarkup(markup)

        self.outString += context.getXML()
        print context.getSectionText()
        #raw_input('continue')
        context.computeDocumentGraph(verbose=True)
        ag = nx.to_pydot(context.getDocumentGraph(), strict=True)
        ag.write("case%03d.pdf" % self.currentCase, format="pdf")
        #print "*"*42
        #print context.getXML(currentGraph=False)
        #print "*"*42
        raw_input('continue')
def annotate_sentence(targets, modifiers, doc):
    """Annotate a spaCy Document for lexical targets and lexical modifiers.

    pyConTextNLP uses NetworkX directional graphs to represent the markup;
    nodes in the graph will be the concepts that are identified in the sentence
    and edges in the graph will be the relationships between those concepts.

    Args:
        targets (pyConTextNLP.itemData.itemData): itemData stores a literal,
            category, regular expression, and rule of the targets extracted
            from the targets_file input.

        modifiers (pyConTextNLP.itemData.itemData): itemData stores a literal,
            category, regular expression, and rule of the modifiers extracted
            from the modifiers_file input.

        doc (spacy.tokens.doc.Doc): spaCy Document containing the radiology
            report.

    Returns:
        context (pyConTextNLP.pyConTextGraph.ConTextDocument): object containing
            sentence markups across the report understood as a digraph of the
            relationships between lexical targets and lexical modifiers.

    """
    # Create the pyConText instance for the report
    context = pyConText.ConTextDocument()

    # Split the report into individual sentences
    sentences = [sent.string.strip() for sent in doc.sents]

    # For the report, markup sentences, with span and modifier pruning, and add markup to context
    for s in sentences:
        markup = markup_sentence(targets, modifiers, s.lower())
        context.addMarkup(markup)

    return context
    def analyzeReport(
        self,
        idName,
        report,
        modFilters=[
            'indication', 'pseudoneg', 'probable_negated_existence',
            'definite_negated_existence', 'probable_existence',
            'definite_existence', 'historical', 'carotid_critical',
            'carotid_noncritical', 'right_sidedness', 'left_sidedness',
            'bilateral_sidedness', 'sidedness',
            'common_carotid_neurovascularanatomy',
            'bulb_carotid_neurovascularanatomy',
            'internal_carotid_neurovascularanatomy'
        ]):
        """given an individual radiology report, creates a pyConTextSql
        object that contains the context markup

        report: a text string containing the radiology reports
        mode: which of the pyConText objects are we using: disease
        modFilters: """
        self.context = pyConText.ConTextDocument()
        targets = self.targets
        modifiers = self.modifiers
        if modFilters == None:
            modFilters = [
                'indication',
                'pseudoneg',
                'probable_negated_existence',
                'definite_negated_existence',
                'probable_existence',
                'definite_existence',
                'historical',
                'carotid_critical',
                'carotid_noncritical',
                'right_sidedness',
                'left_sidedness',
                'bilateral_sidedness',
                'sidedness',
                'bulb_carotid_neurovascularanatomy',
                'common_carotid_neurovascularanatomy',
                'internal_carotid_neurovascularanatomy',
            ]

        splitter = helpers.sentenceSplitter()
        sentences = splitter.splitSentences(report)
        count = 0
        print idName

        for s in sentences:
            markup = pyConText.ConTextMarkup()
            markup.setRawText(s)
            markup.cleanText()

            markup.markItems(modifiers, mode="modifier")
            markup.markItems(targets, mode="target")

            #markup.pruneMarks()
            #markup.dropMarks('Exclusion')
            markup.applyModifiers()
            #markup.pruneModifierRelationships()
            markup.dropInactiveModifiers()
            count += 1

            self.context.addMarkup(markup)
        idName, sevFlag, htmlStr = html.mark_document_with_html(
            idName, self.context)  #;raw_input()
        #         fo=open(self.html_dir+"\\%s.html"%idName, "w")
        #         fo.write(htmlStr)
        #         fo.close()

        self.outString += self.context.getXML() + u"\n"

        print self.context.getXML()  #;raw_input()

        return idName, sevFlag, htmlStr
示例#16
0
def create_context_doc(list_of_markups, modifiers=modifiers, targets=targets):
    """Creates a ConText document out of a list of markups."""
    context_doc = pyConText.ConTextDocument()
    for m in list_of_markups:
        context_doc.addMarkup(m)
    return context_doc
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table)
        

        t = time.localtime()

        self.save_dir = options.save_dir#+"-%s-%s-%s"%(t[0],t[1],t[2])

        count = 1
        if( not os.path.exists(self.save_dir) ):
            os.mkdir(self.save_dir)
        
        self.html_dir=self.save_dir+"/html/"
        if( not os.path.exists(self.html_dir) ):
            os.mkdir(self.html_dir)
            
            

        print options.dbname
        self.doGraphs = options.doGraphs
        self.allow_uncertainty = options.allow_uncertainty
        self.proc_category = options.category
        self.conn = sqlite.connect(options.dbname+".db")
        print options.dbname+".db"
        self.cursor = self.conn.cursor()
        print self.query1
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()
                
        
        print "number of reports to process",len(self.reports)
        #raw_input('continue')
 

        tmp = os.path.splitext(options.odbname)
        outfile = tmp[0]+self.proc_category+"_%s.db"%(self.allow_uncertainty)
        rsltsDB = os.path.join(self.save_dir,outfile)
        if( os.path.exists(rsltsDB) ):
            os.remove(rsltsDB)
            
        
        #old database output by DM
        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()

#         
        self.resultsCursor.execute("""CREATE TABLE alerts (
            reportid TEXT,
            smokingStatus TEXT,
            report TEXT)""")
        


        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        
        #DM - addition
        self.context=pyConText.ConTextDocument()
        mods=itemData.instantiateFromCSV(options.lexical_kb)
        trgs=itemData.instantiateFromCSV(options.Hx_kb)
        
        self.modifiers = itemData.itemData()
        for mod in mods.keys():
            self.modifiers.prepend(mods[mod])
  
        self.targets = itemData.itemData()
        for trg in trgs.keys():
            self.targets.prepend(trgs[trg])
示例#18
0
No vomiting, chest pain, shortness of breath, nausea, dizziness, or chills on arrival.
On operative day three, the patient fever was detected with temperature 101.5 F.
After 3 days no fever was detected.
Patient came back for a follow up, denies fever.
'''

sentences = sentence_segmenter.segToSentenceSpans(input)

# See what the document was splitted into
for sentence in sentences:
    print("Sentence({}-{}):\t{}".format(sentence.begin, sentence.end,
                                        input[sentence.begin:sentence.end]))
    print('\n' + '-' * 100 + '\n')

# initiate a pyConTextGraph to hold the pyConText output
context_doc = pyConTextGraph.ConTextDocument()

for sentence in sentences:
    sentence_text = input[sentence.begin:sentence.end].lower()
    # Process every sentence by adding markup
    m = markup_sentence(sentence_text, modifiers=modifiers, targets=targets)
    context_doc.addMarkup(m)
    context_doc.getSectionMarkups()
    print(m)

# convert graphic markups into dataframe
markups = get_document_markups(context_doc)
annotations, relations, doc_txt = convertMarkups2DF(markups)

head(annotations)
示例#19
0
        self.table = table
        self.result_label = result_label
        self.query1 = '''SELECT %s,%s FROM %s''' % (self.rid, self.column,
                                                    self.table)
        print self.query1
        self.mode = mode
        self.dbname = dbname
        self.getDBConnection(self.dbname)

        # get reports to process
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process", len(self.reports)
        # Create the pyConTextNLP ConTextDocument. This is the container for all the markups
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.itemData()
        self.targets = itemData.itemData()
        for kb in lexical_kb:
            self.modifiers.extend(itemData.instantiateFromCSVtoitemData(kb))
        for kb in domain_kb:
            self.targets.extend(itemData.instantiateFromCSVtoitemData(kb))

        self.debug = debug
        if (self.debug):
            print "debug set to True"
            tmp = os.path.splitext(self.dbname)
            self.debugDir = tmp[0] + "_debug_dir"
            if (not os.path.exists(self.debugDir)):
                os.mkdir(self.debugDir)