Пример #1
0
    def computeStatsOld(self,
                        absList,
                        statOut=None,
                        errorOut=None,
                        typeList=[]):
        """ compute RPF stats for associated mentions and quantities in a list
            of abstracts.

            write final RPF stats to statOut
            write TP/FP/FN to errorOut
        """
        statOut.copy(self.statList)

        # how many of the associations are correct/incorrect?
        componentIncorrect = 0
        statDescription = '(G,O) - OM'
        stats = EntityStats([statDescription])
        for abstract in self.associationList.keys():
            errorOut.write('---%s ---\n' % (abstract.id))

            for omAssociation in self.associationList[abstract]:
                group = omAssociation.group
                outcome = omAssociation.outcome

                er = omAssociation.outcomeMeasurement.getTextEventRate()
                on = omAssociation.outcomeMeasurement.getOutcomeNumber()
                errorMsgs = []
                self.checkQuantity(er, group, outcome, errorMsgs)
                self.checkQuantity(on, group, outcome, errorMsgs)
                if len(errorMsgs) == 0:
                    stats.irstats[statDescription].incTP()
                    prefix = '+TP'
                else:
                    stats.irstats[statDescription].incFP()
                    prefix = '-FP'
                    if (er == None or er.isTruePositive()) and (on == None or on.isTruePositive()) \
                            and (er == None or on == None or on.shouldBelongToSameOutcomeMeasurement(er)):
                        # this outcome measurement is correct and should be associated
                        # this was a false negative and a false positive
                        stats.irstats[statDescription].incFN()
                        errorOut.write('-FN: %s\n' %
                                       (omAssociation.getValueString()))

                errorOut.write('%s: %s\n' % (prefix, omAssociation.toString()))
                if len(errorMsgs) > 0:
                    errorOut.write(', '.join(errorMsgs) + '\n')

            # look at those OMs that were not associated
            if abstract in self.incompleteMatches:
                self.__processIncompleteMatches(abstract, errorOut, stats,
                                                statDescription)

        for abstract in absList:
            if abstract not in self.associationList.keys():
                errorOut.write('---%s ---\n' % (abstract.id))
                if abstract in self.incompleteMatches:
                    self.__processIncompleteMatches(abstract, errorOut, stats,
                                                    statDescription)

        stats.saveStats(statOut, keyPrefix='Assoc ')
Пример #2
0
    def computeStats(self, absList, statOut=None, errorOut=None, typeList=[], keyPrefix=''):
        """ compute RPF stats for associated mentions and quantities in a list
            of abstracts.

            write final RPF stats to statOut
            write TP/FP/FN to errorOut
        """
        if len(keyPrefix) == 0:
            keyPrefix = 'Assoc - '

        if len(typeList) > 0:
            statDescription = '-'.join(typeList)
        else:
            statDescription = self.entityTypesString

        stats = EntityStats([statDescription])

        totalFalsePairs = 0
        for abs in absList:
            errorOut.write('---%s (%s)\n'%(abs.id, statDescription))
            for s in abs.sentences:
                [tp, fp, fn, falsePairs] = self.checkAssociations(s, errorOut, typeList)
                errorOut.write('tp: %d, fp: %d, fn: %d, falsePairs: %d\n'%(tp, fp, fn, falsePairs))
                totalFalsePairs += falsePairs
                stats.irstats[statDescription].addTP(tp)
                stats.irstats[statDescription].addFP(fp)
                stats.irstats[statDescription].addFN(fn)
        stats.printStats()
        print 'Total false pairs: ', totalFalsePairs
        #    stats.writeStats(statOut)
        stats.saveStats(statOut, keyPrefix=keyPrefix)
        return stats
Пример #3
0
  def computeStats(self, absList, statOut=None, errorOut=None):
    """ compute RPF stats for detected quantities in a list of abstracts.
        write results to output stream. 
        
        write final RPF stats to statOut
        write TP/FP/FN to errorOut
        """      
    stats = EntityStats(self.entityTypes)
    for abs in absList:
      errorOut.write('---'+abs.id+'---\n')
      for sentence in abs.sentences:
        # decide if should output sentence
        printedSentence = False
        # count correct and incorrect labelings
        for eType in self.entityTypes:
          for token in sentence:
            if self.isImportantNumber(token) == True:
              if token.hasLabel(eType) and token.hasAnnotation(eType):
                stats.irstats[eType].incTP()
                errorOut.write('+TP: ' + token.text + ' ('+eType+')\n')
              elif token.hasLabel(eType) == True:
                stats.irstats[eType].incFP()
                errorOut.write('-FP: ' + token.text + ' ('+eType+')\n')
              elif token.hasAnnotation(eType) == True:
                stats.irstats[eType].incFN()
                errorOut.write('-FN: ' + token.text + ' ('+eType+')\n')
  
    stats.printStats()
#    stats.writeStats(statOut)
    if statOut != None:
      stats.saveStats(statOut, keyPrefix='NF - ')
          
    return stats
Пример #4
0
    def computeStatsOld(self, absList, statOut=None, errorOut=None, typeList=[]):
        """ compute RPF stats for associated mentions and quantities in a list
            of abstracts.

            write final RPF stats to statOut
            write TP/FP/FN to errorOut
        """
        statOut.copy(self.statList)

        # how many of the associations are correct/incorrect?
        componentIncorrect = 0
        statDescription = '(G,O) - OM'
        stats = EntityStats([statDescription])
        for abstract in self.associationList.keys():
            errorOut.write('---%s ---\n'%(abstract.id))

            for omAssociation in self.associationList[abstract]:
                group = omAssociation.group
                outcome = omAssociation.outcome

                er = omAssociation.outcomeMeasurement.getTextEventRate()
                on = omAssociation.outcomeMeasurement.getOutcomeNumber()
                errorMsgs = []
                self.checkQuantity(er, group, outcome, errorMsgs)
                self.checkQuantity(on, group, outcome, errorMsgs)
                if len(errorMsgs) == 0:
                    stats.irstats[statDescription].incTP()
                    prefix = '+TP'
                else:
                    stats.irstats[statDescription].incFP()
                    prefix = '-FP'
                    if (er == None or er.isTruePositive()) and (on == None or on.isTruePositive()) \
                            and (er == None or on == None or on.shouldBelongToSameOutcomeMeasurement(er)):
                        # this outcome measurement is correct and should be associated
                        # this was a false negative and a false positive
                        stats.irstats[statDescription].incFN()
                        errorOut.write('-FN: %s\n'%(omAssociation.getValueString()))

                errorOut.write('%s: %s\n'%(prefix, omAssociation.toString()))
                if len(errorMsgs) > 0:
                    errorOut.write(', '.join(errorMsgs)+'\n')

            # look at those OMs that were not associated
            if abstract in self.incompleteMatches:
                self.__processIncompleteMatches(abstract, errorOut, stats, statDescription)

        for abstract in absList:
            if abstract not in self.associationList.keys():
                errorOut.write('---%s ---\n'%(abstract.id))
                if abstract in self.incompleteMatches:
                    self.__processIncompleteMatches(abstract, errorOut, stats, statDescription)

        stats.saveStats(statOut, keyPrefix='Assoc ')
Пример #5
0
    def computeStats(self,
                     absList,
                     statOut=None,
                     errorOut=None,
                     typeList=[],
                     keyPrefix=''):
        """ compute RPF stats for associated mentions and quantities in a list
            of abstracts.

            write final RPF stats to statOut
            write TP/FP/FN to errorOut
        """
        if len(keyPrefix) == 0:
            keyPrefix = 'Assoc - '

        if len(typeList) > 0:
            statDescription = '-'.join(typeList)
        else:
            statDescription = self.entityTypesString

        stats = EntityStats([statDescription])

        totalFalsePairs = 0
        for abs in absList:
            errorOut.write('---%s (%s)\n' % (abs.id, statDescription))
            for s in abs.sentences:
                [tp, fp, fn,
                 falsePairs] = self.checkAssociations(s, errorOut, typeList)
                errorOut.write('tp: %d, fp: %d, fn: %d, falsePairs: %d\n' %
                               (tp, fp, fn, falsePairs))
                totalFalsePairs += falsePairs
                stats.irstats[statDescription].addTP(tp)
                stats.irstats[statDescription].addFP(fp)
                stats.irstats[statDescription].addFN(fn)
        stats.printStats()
        print 'Total false pairs: ', totalFalsePairs
        #    stats.writeStats(statOut)
        stats.saveStats(statOut, keyPrefix=keyPrefix)
        return stats
Пример #6
0
  def computeStats(self, absList, statOut=None, errorOut=None):
    """ compute RPF stats for detected mentions in a list of abstracts.
        write results to output stream.
        
        write final RPF stats to statOut
        write TP/FP/FN to errorOut
        """
      
    stats = EntityStats(self.entityTypes)
    for abs in absList:
      errorOut.write('---'+abs.id+'---\n')      
      
      # identify ALL annotated mentions, even in sentences we are not focused on
#      for sentence in abs.allSentences():
#        for mType in self.entityTypes:
#          aList = sentence.getAnnotatedMentions(mType, recomputeMentions=True)
#        
#      for sentence in abs.sentences:
#        for mType in self.entityTypes:
#          self.compareAnnotatedAndDetected(sentence, mType, \
#                               stats.irstats[mType], errorOut)


      for sentence in abs.allSentences():
        for mType in self.entityTypes:
          if sentence in abs.sentences:
            self.compareAnnotatedAndDetected(sentence, mType, \
                               stats.irstats[mType], errorOut)
          else:          
            aList = sentence.getAnnotatedMentions(mType, recomputeMentions=True)
        

    stats.printStats()
    if statOut != None:
      stats.saveStats(statOut, keyPrefix='MF - ')
    
    return stats
Пример #7
0
    def computeStats(self, absList, statOut=None, errorOut=None, typeList=[]):
        """ compute RPF stats for associated outcome measurements and group and outcome mentions in a list
            of abstracts.

            write final RPF stats to statOut
            write TP/FP/FN to errorOut
        """
        statOut.copy(self.statList)

        # how many of the associations are correct/incorrect?
        statDescription = '(G,O) - OM'
        stats = EntityStats([statDescription])
        taFile = open('trueassociations.%s.txt' % self.finderType, 'w')
        for abstract in absList:
            errorOut.write('---%s ---\n' % (abstract.id))
            trueAssociations = self.buildTrueAssociations(abstract)

            taFile.write('---%s ---\n' % (abstract.id))
            for ta in trueAssociations:
                taFile.write(ta.toString() + '\n')

            if abstract in self.associationList:
                for omAssociation in self.associationList[abstract]:
                    group = omAssociation.outcomeMeasurement.getGroup()
                    outcome = omAssociation.outcomeMeasurement.getOutcome()
                    er = omAssociation.outcomeMeasurement.getTextEventRate()
                    on = omAssociation.outcomeMeasurement.getOutcomeNumber()

                    errorMsgs = []
                    matchFound = False
                    for ta in trueAssociations:
                        if ta.match == None and (
                            (er != None and er == ta.eventrate) or
                            (on != None and on == ta.outcomeNumber)):
                            # we have a potential match. check to see if everything matches
                            errorMsgs = []
                            self.checkQuantity(er, group, outcome, errorMsgs)
                            self.checkQuantity(on, group, outcome, errorMsgs)
                            if len(errorMsgs) == 0:
                                matchFound = True
                                ta.match = omAssociation

                            break

                    if matchFound:
                        stats.irstats[statDescription].incTP()
                        prefix = '+TP'
                    else:
                        # we have a false positive
                        stats.irstats[statDescription].incFP()
                        prefix = '-FP'

                    errorOut.write('%s: %s\n' %
                                   (prefix, omAssociation.toString()))
                    if len(errorMsgs) > 0:
                        errorOut.write(', '.join(errorMsgs) + '\n')

            # look at those OMs that were not associated
            if abstract in self.incompleteMatches:
                self.__processIncompleteMatches(abstract, errorOut)

            # count false negatives
            for ta in trueAssociations:
                if ta.match == None:
                    stats.irstats[statDescription].incFN()
                    errorOut.write('-FN: %s\n' % (ta.toString()))

        taFile.close()
        stats.saveStats(statOut, keyPrefix='Assoc ')
Пример #8
0
    def computeStats(self, absList, statOut=None, errorOut=None, typeList=[]):
        """ compute RPF stats for associated outcome measurements and group and outcome mentions in a list
            of abstracts.

            write final RPF stats to statOut
            write TP/FP/FN to errorOut
        """
        statOut.copy(self.statList)

        # how many of the associations are correct/incorrect?
        statDescription = '(G,O) - OM'
        stats = EntityStats([statDescription])
        taFile = open('trueassociations.%s.txt'%self.finderType, 'w')
        for abstract in absList:
            errorOut.write('---%s ---\n'%(abstract.id))
            trueAssociations = self.buildTrueAssociations(abstract)

            taFile.write('---%s ---\n'%(abstract.id))
            for ta in trueAssociations:
                taFile.write(ta.toString() + '\n')

            if abstract in self.associationList:
                for omAssociation in self.associationList[abstract]:
                    group = omAssociation.outcomeMeasurement.getGroup()
                    outcome = omAssociation.outcomeMeasurement.getOutcome()
                    er = omAssociation.outcomeMeasurement.getTextEventRate()
                    on = omAssociation.outcomeMeasurement.getOutcomeNumber()

                    errorMsgs = []
                    matchFound = False
                    for ta in trueAssociations:
                        if ta.match == None and ((er != None and er == ta.eventrate) or (on != None and on == ta.outcomeNumber)):
                            # we have a potential match. check to see if everything matches
                            errorMsgs = []
                            self.checkQuantity(er, group, outcome, errorMsgs)
                            self.checkQuantity(on, group, outcome, errorMsgs)
                            if len(errorMsgs) == 0:
                                matchFound = True
                                ta.match = omAssociation

                            break

                    if matchFound:
                        stats.irstats[statDescription].incTP()
                        prefix = '+TP'
                    else:
                        # we have a false positive
                        stats.irstats[statDescription].incFP()
                        prefix = '-FP'

                    errorOut.write('%s: %s\n'%(prefix, omAssociation.toString()))
                    if len(errorMsgs) > 0:
                        errorOut.write(', '.join(errorMsgs)+'\n')

            # look at those OMs that were not associated
            if abstract in self.incompleteMatches:
                self.__processIncompleteMatches(abstract, errorOut)

            # count false negatives
            for ta in trueAssociations:
                if ta.match == None:
                    stats.irstats[statDescription].incFN()
                    errorOut.write('-FN: %s\n'%(ta.toString()))

        taFile.close()
        stats.saveStats(statOut, keyPrefix='Assoc ')