示例#1
0
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """
        

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table)
        
        self.conn = sqlite.connect(options.dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()
        
        print "number of reports to process",len(self.reports)
        self.context = pyConText.pyConText()
       
        mods = itemData.instantiateFromCSV(options.lexical_kb)
        trgs = itemData.instantiateFromCSV(options.domain_kb)

        self.modifiers = itemData.itemData()
        for key in mods.keys():
            self.modifiers.prepend(mods[key])

        self.targets = itemData.itemData()
        for key in trgs.keys():
            self.targets.prepend(trgs[key])
    def markup_report(
            self,
            report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of
                                    bowel obstruction or mass identified within the abdomen or pelvis. 
                                    Non-specific interstitial opacities and bronchiectasis seen at the right
                                    base, suggestive of post-inflammatory changes.
                                    ''',
            modifiers=None,
            targets=None):
        print("type of modifiers", type(modifiers))
        print("len of modifiers", len(modifiers))
        print(modifiers)
        for m in modifiers:
            print(m)

        if modifiers == None:
            _modifiers = self.mod
        else:
            _modifiers = itemData.itemData()
            _modifiers.extend(json.loads(modifiers))
        if targets == None:
            _targets = self.tar
        else:
            _targets = itemData.itemData()
            _targets.extend(json.loads(targets))

        context = self.split_sentences(report, _modifiers, _targets)
        clrs = self.get_colors_dict(_modifiers, _targets)
        return html.mark_document_with_html(context, colors=clrs)
    def markup_report(self, report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of
                                    bowel obstruction or mass identified within the abdomen or pelvis. 
                                    Non-specific interstitial opacities and bronchiectasis seen at the right
                                    base, suggestive of post-inflammatory changes.
                                    ''',
                                    modifiers=None,
                                    targets=None):
        print("type of modifiers",type(modifiers))
        print("len of modifiers",len(modifiers))
        print(modifiers)
        for m in modifiers:
            print(m)

        if modifiers==None:
            _modifiers = self.mod 
        else:
            _modifiers = itemData.itemData()
            _modifiers.extend(json.loads(modifiers))
        if targets==None:
            _targets=self.tar 
        else:
            _targets = itemData.itemData()
            _targets.extend(json.loads(targets))


        
        context = self.split_sentences(report, _modifiers, _targets) 
        clrs = self.get_colors_dict(_modifiers, _targets)
        return html.mark_document_with_html(context, colors=clrs)
示例#4
0
def get_target_phrases_item_data(target_phrases):
    # make an itemData of our custom target phrases
    target_phrases_item_data = itemData.itemData()
    for target_phrase in target_phrases:
        # create a contextItem from the target phrase
        contextItemTarget = itemData.contextItem(
            [target_phrase, target_phrase, target_phrase, target_phrase])
        target_phrases_item_data.append(contextItemTarget)
    return target_phrases_item_data
示例#5
0
    def setUp(self):
        # create a sample image in memory
        self.context = pyConText.ConTextMarkup()
        self.splitter = helpers.sentenceSplitter()

        self.su1 = u"kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?"
        self.su2 = u"IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM."
        self.su3 = u"This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence."
        self.su4 = u"This is a sentence with a numeric value equal to 1.43 and should not be split into two parts."
        self.items = [
            [u"pulmonary embolism", u"PULMONARY_EMBOLISM", ur"""pulmonary\s(artery )?(embol[a-z]+)""", ""],
            ["no gross evidence of", "PROBABLE_NEGATED_EXISTENCE", "", "forward"],
        ]
        self.itemData = itemData.itemData()
        for i in self.items:
            cit = itemData.contextItem
示例#6
0
    def setUp(self):
        # create a sample image in memory
        self.context = pyConText.ConTextMarkup()
        self.splitter = helpers.sentenceSplitter()

        self.su1 = u'kanso <Diagnosis>**diabetes**</Diagnosis> utesl\xf6t eller diabetes men inte s\xe4kert. Vi siktar p\xe5 en r\xf6ntgenkontroll. kan det vara nej panik\xe5ngesten\n?'
        self.su2 = u'IMPRESSION: 1. LIMITED STUDY DEMONSTRATING NO GROSS EVIDENCE OF SIGNIFICANT PULMONARY EMBOLISM.'
        self.su3 = u'This is a sentence that does not end with a number. But this sentence ends with 1. So this should be recognized as a third sentence.'
        self.su4 = u'This is a sentence with a numeric value equal to 1.43 and should not be split into two parts.'
        self.items = [[
            u"pulmonary embolism", u"PULMONARY_EMBOLISM",
            ur"""pulmonary\s(artery )?(embol[a-z]+)""", ""
        ], [
            "no gross evidence of", "PROBABLE_NEGATED_EXISTENCE", "", "forward"
        ]]
        self.itemData = itemData.itemData()
        for i in self.items:
            cit = itemData.contextItem
示例#7
0
def convertCSVtoitemData(csvFile,
                         encoding='utf-8',
                         delimiter="\t",
                         headerRows=1,
                         literalColumn=0,
                         categoryColumn=1,
                         regexColumn=2,
                         ruleColumn=3):
    """
    takes a CSV file of itemdata rules and creates a single itemData instance.
    csvFile: name of file to read items from
    encoding: unicode enocidng to use; default = 'utf-8'
    headerRows: number of header rows in file; default = 1
    literalColumn: column from which to read the literal; default = 0
    categoryColumn: column from which to read the category; default = 1
    regexColumn: column from which to read the regular expression: default = 2
    ruleColumn: column from which to read the rule; default = 3
    """
    items = itemData.itemData()  # itemData to be returned to the user
    header = []
    #reader, f0 = get_fileobj(csvFile)
    #print csvFile
    f = open(csvFile, 'rU')
    reader = csv.reader(f, delimiter=delimiter)
    # first grab numbe rof specified header rows
    for i in range(headerRows):
        row = next(reader)
        header.append(row)
    # now grab each itemData
    for row in reader:
        #print row
        tmp = [
            row[literalColumn], row[categoryColumn], row[regexColumn],
            row[ruleColumn]
        ]
        tmp[2] = r'''{0}'''.format(
            tmp[2])  # convert the regular expression string into a raw string
        item = itemData.contextItem(tmp)
        items.append(item)
    f.close()
    return items
示例#8
0
        self.result_label = result_label
        self.query1 = '''SELECT %s,%s FROM %s'''%(self.rid,self.column,self.table)
        print self.query1
        self.mode = mode
        self.dbname = dbname
        self.getDBConnection(self.dbname)

        # get reports to process
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process",len(self.reports)
        # Create the pyConTextNLP ConTextDocument. This is the container for all the markups
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.itemData()
        self.targets = itemData.itemData()
        for kb in lexical_kb:
            self.modifiers.extend( itemData.instantiateFromCSVtoitemData(kb) )
        for kb in domain_kb:
            self.targets.extend( itemData.instantiateFromCSVtoitemData(kb) )


        self.debug = debug
        if( self.debug ):
            print "debug set to True"
            tmp = os.path.splitext(self.dbname)
            self.debugDir = tmp[0]+"_debug_dir"
            if( not os.path.exists(self.debugDir) ):
                os.mkdir(self.debugDir)
        else:
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table)
        

        t = time.localtime()

        self.save_dir = options.save_dir#+"-%s-%s-%s"%(t[0],t[1],t[2])

        count = 1
        if( not os.path.exists(self.save_dir) ):
            os.mkdir(self.save_dir)
        
        self.html_dir=self.save_dir+"/html/"
        if( not os.path.exists(self.html_dir) ):
            os.mkdir(self.html_dir)
            
            

        print options.dbname
        self.doGraphs = options.doGraphs
        self.allow_uncertainty = options.allow_uncertainty
        self.proc_category = options.category
        self.conn = sqlite.connect(options.dbname+".db")
        print options.dbname+".db"
        self.cursor = self.conn.cursor()
        print self.query1
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()
                
        
        print "number of reports to process",len(self.reports)
        #raw_input('continue')
 

        tmp = os.path.splitext(options.odbname)
        outfile = tmp[0]+self.proc_category+"_%s.db"%(self.allow_uncertainty)
        rsltsDB = os.path.join(self.save_dir,outfile)
        if( os.path.exists(rsltsDB) ):
            os.remove(rsltsDB)
            
        
        #old database output by DM
        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()

#         
        self.resultsCursor.execute("""CREATE TABLE alerts (
            reportid TEXT,
            smokingStatus TEXT,
            report TEXT)""")
        


        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        
        #DM - addition
        self.context=pyConText.ConTextDocument()
        mods=itemData.instantiateFromCSV(options.lexical_kb)
        trgs=itemData.instantiateFromCSV(options.Hx_kb)
        
        self.modifiers = itemData.itemData()
        for mod in mods.keys():
            self.modifiers.prepend(mods[mod])
  
        self.targets = itemData.itemData()
        for trg in trgs.keys():
            self.targets.prepend(trgs[trg])
示例#10
0
 def test_instantiate_itemData(self):
     cit1 = itemData.contextItem(self.items[0])
     it1 = itemData.itemData()
     it1.append(cit1)
     assert it1
示例#11
0
 def test_instantiate_itemData(self):
     cit1 = itemData.contextItem(self.items[0])
     it1 = itemData.itemData()
     it1.append(cit1)
     assert it1
示例#12
0
        self.query1 = '''SELECT %s,%s FROM %s''' % (self.rid, self.column,
                                                    self.table)
        print self.query1
        self.mode = mode
        self.dbname = dbname
        self.getDBConnection(self.dbname)

        # get reports to process
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process", len(self.reports)
        # Create the pyConTextNLP ConTextDocument. This is the container for all the markups
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.itemData()
        self.targets = itemData.itemData()
        for kb in lexical_kb:
            self.modifiers.extend(itemData.instantiateFromCSVtoitemData(kb))
        for kb in domain_kb:
            self.targets.extend(itemData.instantiateFromCSVtoitemData(kb))

        self.debug = debug
        if (self.debug):
            print "debug set to True"
            tmp = os.path.splitext(self.dbname)
            self.debugDir = tmp[0] + "_debug_dir"
            if (not os.path.exists(self.debugDir)):
                os.mkdir(self.debugDir)
        else:
            self.debugDir = ''