Пример #1
0
Файл: test2.py Проект: DBMI/NLP
def main():
        path = os.getcwd()
        fileName = "".join([path,r"\rsAnnotations-1-120-random.txt"])
        f=open(fileName,'r') 
        t=f.readlines()
        f.close()

        sentences = []
        targets = []
        negationStatus = []
        temporalityStatus = []
        experiencerStatus = []
        for line in xrange(1,len(t)):
            contents = t[line].split("\t")
            target = contents[1]
            sentence = contents[2]
            negation = contents[3]
            temporality = contents[4]
            experiencer = contents[5]
            targets.append(target)
            sentences.append(sentence)
            negationStatus.append(negation)
            temporalityStatus.append(temporality)
            experiencerStatus.append(experiencer)

####### build modifier list        
        modifiers = itemData()
        modifiers.prepend(pseudoNegations)
        modifiers.prepend(definiteNegations)
        modifiers.prepend(probableNegations)
        modifiers.prepend(probables)
        modifiers.prepend(definites)
        modifiers.prepend(indications)
        modifiers.prepend(conjugates)
        modifiers.prepend(historicals)
        modifiers.prepend(hypothetical)
        modifiers.prepend(pseudoexperiencers)
        modifiers.prepend(experiencers)
        
####### END modifier list

####### Condition List
        conditions = []
        for target in targets:#[:20]:
            tar = target.split(" ")
            tar = filter(None,tar)
            sLiteral = (" ".join(tar)).lower().translate(string.maketrans("",""), string.punctuation)###### END condition list
            conditions.append(sLiteral)
####### END condition list

####### build target list        
        targetItems= itemData()
        targetDict = {}
        for target in targets:#[:20]:
            tar = target.split(" ")
            tar = filter(None,tar)
            sLiteral = (" ".join(tar)).lower().translate(string.maketrans("",""), string.punctuation)
            sCategory = re.sub(" ","_",sLiteral.upper())
            sRe = sLiteral
            sRule = ""
            s = contextItem([sLiteral,sCategory,sRe,sRule])
            condLen = len(tar)
            if not targetDict.has_key(condLen):
                targetDict[condLen]=itemData()
            targetDict[condLen].append(s)
        for key in targetDict.keys(): #xrange(1,2):#
            targetItems.prepend(targetDict[key])
        print len(targetItems)

####### END targets

        negCount = 0
        tempCount = 0
        ptCount = 0
        #negs = []
        #temps = []
        #exps = []   
        sentenceCount=0
        sentenceCount_original=0
        for s in sentences:     
            context = pyConText.pyConText(s)
            context.setTxt(s) 
            context.markItems(modifiers, mode="modifier")
            context.markItems(targetItems, mode="target")
            context.pruneMarks()
            context.dropMarks('Exclusion')
            context.applyModifiers()
            context.dropInactiveModifiers()
            context.commit()

            context_original = pyConText_original.pyConText(s)
            context_original.setTxt(s) 
            context_original.markItems(modifiers, mode="modifier")
            context_original.markItems(targetItems, mode="target")
            context_original.pruneMarks()
            context_original.dropMarks('Exclusion')
            context_original.applyModifiers()
            context_original.dropInactiveModifiers()
            context_original.commit()

            rec = classifyDocumentTargets(context,False)
            rec_original = classifyDocumentTargets(context_original,False)

            if type(rec) == dict and type(rec_original) == dict and rec.has_key(conditions[sentenceCount]) and rec_original.has_key(conditions[sentenceCount_original]):
                if rec[conditions[sentenceCount]] != rec_original[conditions[sentenceCount_original]]:
                    print sentenceCount,":",s  
                    print rec[conditions[sentenceCount]]
                    print rec_original[conditions[sentenceCount_original]]                    
#                neg = rec[conditions[sentenceCount]]['disease']
#                temp = rec[conditions[sentenceCount]]['temporality']
#                experiencer = rec[conditions[sentenceCount]]['experiencer']
#                neg_original = rec_original[conditions[sentenceCount_original]]['disease']
#                temp_original = rec_original[conditions[sentenceCount_original]]['temporality']
#                experiencer_original = rec_original[conditions[sentenceCount_original]]['experiencer']
#
#                
#                if (neg == "Pos" and  negationStatus[sentenceCount]=="Affirmed") or (neg == "Neg" and  negationStatus[sentenceCount]=="Negated") or (neg == "Probable" and  negationStatus[sentenceCount]=="Possible"):
#                    negCount+=1
#                else: print "other negation status"
#                if (temp == "Old" and temporalityStatus[sentenceCount] == "Historical") or (temp == "New" and temporalityStatus[sentenceCount] == "Recent") or (temp == "Hypothetical" and temporalityStatus[sentenceCount] == "Hypothetical"):
#                    tempCount+=1
#                if (experiencer =='Patient' and experiencerStatus == "Patient") or (experiencer =='Other' and experiencerStatus == "Other"):
#                    ptCount+=1
            sentenceCount+=1
            sentenceCount_original+=1
Пример #2
0
    def __init__(self, dbname, outfile):
        """create an instance of a PEContext object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT id,impression FROM pesubject'''

        self.conn = sqlite.connect(dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process", len(self.reports)
        #raw_input('continue')
        t = time.localtime()
        d = datetime.datetime(t[0], t[1], t[2])

        # create context objects for each of the questions we want to be answering
        self.context = {
            "disease": pyConText.pyConText(),
            "quality": pyConText.pyConText(),
            "quality2": pyConText.pyConText()
        }

        rsltsDB = outfile
        if (os.path.exists(rsltsDB)):
            os.remove(rsltsDB)

        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()
        self.resultsCursor.execute("""CREATE TABLE results (
            id INT PRIMARY KEY,
            disease TEXT,
            uncertainty TEXT,
            historical TEXT,
            quality TEXT)""")

        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        self.modifiers = {"disease": itemData.itemData()}
        self.modifiers["disease"].extend(pseudoNegations)
        self.modifiers["disease"].extend(definiteNegations)
        self.modifiers["disease"].extend(probableNegations)
        self.modifiers["disease"].extend(probables)
        self.modifiers["disease"].extend(definites)
        self.modifiers["disease"].extend(indications)
        self.modifiers["disease"].extend(historicals)
        self.modifiers["disease"].extend(conjugates)

        # Create a seperate itemData for the quality modifiers
        self.modifiers["quality"] = itemData.itemData()
        self.modifiers["quality"].extend(pseudoNegations)
        self.modifiers["quality"].extend(definiteNegations)
        self.modifiers["quality"].extend(probableNegations)
        self.modifiers["quality"].extend(probables)
        self.modifiers["quality"].extend(historicals)
        self.modifiers["quality"].extend(conjugates)
        self.modifiers["quality"].extend(qualities)
        self.modifiers["quality"].extend(
            [['limited dataset compliant', 'EXCLUSION', '', '']])
        # Quality targets
        self.targets = {"disease": peItems}
        self.targets["quality"] = itemData.itemData()
        self.targets["quality"].extend(examFeatures)
        self.targets["quality"].extend(
            [['limited dataset compliant', 'EXCLUSION', '', '']])

        self.targets["quality2"] = itemData.itemData()
        self.targets["quality2"].extend(artifacts)
        self.temporalCount = 0
        self.models = {}
Пример #3
0
    def __init__(self, dbname, outfile):
        """create an instance of a PEContext object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT id,impression FROM pesubject''' 



        self.conn = sqlite.connect(dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()
        
        print "number of reports to process",len(self.reports)
        #raw_input('continue')
        t = time.localtime()
        d = datetime.datetime(t[0],t[1],t[2])
        
        # create context objects for each of the questions we want to be answering
        self.context = {"disease":pyConText.pyConText(),
                        "quality":pyConText.pyConText(),
                        "quality2":pyConText.pyConText()}


        rsltsDB = outfile
        if( os.path.exists(rsltsDB) ):
            os.remove(rsltsDB)
        
        self.resultsConn = sqlite.connect(rsltsDB)
        self.resultsCursor = self.resultsConn.cursor()
        self.resultsCursor.execute("""CREATE TABLE results (
            id INT PRIMARY KEY,
            disease TEXT,
            uncertainty TEXT,
            historical TEXT,
            quality TEXT)""")


        # Create the itemData object to store the modifiers for the  analysis
        # starts with definitions defined in pyConText and then adds
        # definitions specific for peFinder
        self.modifiers = {"disease":itemData.itemData()}
        self.modifiers["disease"].extend(pseudoNegations)
        self.modifiers["disease"].extend(definiteNegations)
        self.modifiers["disease"].extend(probableNegations)
        self.modifiers["disease"].extend(probables)
        self.modifiers["disease"].extend(definites)
        self.modifiers["disease"].extend(indications)
        self.modifiers["disease"].extend(historicals)
        self.modifiers["disease"].extend(conjugates)

        # Create a seperate itemData for the quality modifiers
        self.modifiers["quality"] = itemData.itemData()
        self.modifiers["quality"].extend(pseudoNegations)
        self.modifiers["quality"].extend(definiteNegations)
        self.modifiers["quality"].extend(probableNegations)
        self.modifiers["quality"].extend(probables)
        self.modifiers["quality"].extend(historicals)
        self.modifiers["quality"].extend(conjugates)
        self.modifiers["quality"].extend(qualities)
        self.modifiers["quality"].extend([['limited dataset compliant','EXCLUSION','','']])
        # Quality targets
        self.targets = {"disease":peItems}
        self.targets["quality"] = itemData.itemData()
        self.targets["quality"].extend(examFeatures)
        self.targets["quality"].extend([['limited dataset compliant','EXCLUSION','','']])

        

        self.targets["quality2"] = itemData.itemData()
        self.targets["quality2"].extend(artifacts)
        self.temporalCount = 0
        self.models = {}