示例#1
0
  def __init__(self, abstract, useAnnotated=False, useTrialReports=True):
    """ create a list of groups and common medical conditions given 
        an abstact object. """
    self.abstract = abstract
    self.useTrialReports = useTrialReports
    self.nTrueGroups = 0
    self.nTrueConditions = 0
    self.nTrueGroupSizes = 0

    if useAnnotated == True:
      self.groupTemplates = abstract.annotatedEntities.getList('group')
      self.ageTemplates = createAnnotatedMergedList(abstract, 'age')
      self.conditionTemplates = abstract.annotatedEntities.getList('condition')
      populationTemplates = createAnnotatedMergedList(abstract, 'population')
    else:  
      self.groupTemplates = abstract.entities.getList('group')
      self.ageTemplates = createMergedList(abstract, 'age')
      self.conditionTemplates = abstract.entities.getList('condition')
      populationTemplates = createMergedList(abstract, 'population') 
           
    self.ageInfo = AgeInfo(self.ageTemplates, abstract, self.useTrialReports) 
    self.gender = Gender(populationTemplates, abstract, self.useTrialReports)
    
    
    # filter useless population terms
    self.populationTemplates = []
    for pTemplate in populationTemplates:
      if pTemplate.isInteresting() > 0:
        # term is informative, keep it
        self.populationTemplates.append(pTemplate)
示例#2
0
  def computeStatistics(self, errorOut):
    """ Count RPF statistics for each unique AGE, CONDITION, POPULATION entity
        statOut = file stream for RPF stats for all parts of summarization system
        errorOut = file stream for TPs, FPs, FNs
        
        return hash of IRstats, one for each mention type, keyed by mention type
        """
    stats = {}
    self.nTrueGroupSizes = 0

    aAgeTemplates = createAnnotatedMergedList(self.abstract, 'age')
    errorOut.write('age:\n')
    stats['age'] = self.ageInfo.countAgeMatches(aAgeTemplates, errorOut)

    errorOut.write('condition:\n')          
    aConditionTemplates = self.abstract.annotatedEntities.getList('condition')
    stats['condition'] = countMatches(aConditionTemplates, \
                                     self.conditionTemplates, errorOut)
    errorOut.write('group:\n')          
    aGroupTemplates = self.abstract.annotatedEntities.getList('group')
    stats['group'] = countMatches(aGroupTemplates, self.groupTemplates, errorOut)

    self.nTrueConditions = len(aConditionTemplates)
    self.nTrueGroups = len(aGroupTemplates)

    errorOut.write('group size:\n') 
    gsStats = IRstats()
    gsFound = set([])
    for gTemplate in self.groupTemplates:
      gSize = gTemplate.getSize(maxSize=True)
      if gSize != 0:
        # look for group size match in sizes for annotated group 
        found = False
        if gTemplate.matchedTemplate != None:
          for trueGSize in gTemplate.matchedTemplate.sizes:
            if gSize == trueGSize.value:
              found = True
              break
                 
        if found:
          # group size is correct
          gsStats.incTP()        
          errorOut.write('  +TP: %s size = %d\n' % (gTemplate.name, gSize))
          gTemplate.groupSizeEvaluation.markCorrect()
          gsFound.add(gTemplate.matchedTemplate)
        else:
          # group size is incorrect
          gsStats.incFP()  
          errorOut.write('  -FP: %s size = %d\n' % (gTemplate.name, gSize))
          gTemplate.groupSizeEvaluation.markIncorrect()
    # look for false negatives
    for trueTemplate in aGroupTemplates:
      if trueTemplate not in gsFound and trueTemplate.matchedTemplate != None and trueTemplate.getSize() > 0:
        # there should be a group size for this group
        gsStats.incFN()  
        errorOut.write('  -FN: %s size = %d\n' % \
                (trueTemplate.name, trueTemplate.getSize()))
        
    
    stats['group size'] = gsStats
    self.nTrueGroupSizes = gsStats.tp + gsStats.fn
#     errorOut.write('population:\n')
#     templates = createAnnotatedMergedList(self.abstract, 'population')
#     aPopulationTemplates = []
#     for pTemplate in templates:
#       if pTemplate.isInteresting() > 0:
#         # term is informative, keep it
#         aPopulationTemplates.append(pTemplate)
#     stats['population'] = self.countMatches(aPopulationTemplates, \
#                       self.populationTemplates, errorOut)
    return stats