def linkTemplates(self, sentence):
    """ link value template to best matching mention template in the same sentence.
        It is assumed that mention clustering has not occurred yet.
        """
    templates = sentence.templates
    onList = templates.getList('on')
    erList = templates.getList('eventrate')
              
    omList = []      
    unmatchedER = []
    unmatchedON = []
    for er in erList:
      for on in onList:
        if on.shouldBelongToSameOutcomeMeasurement(er):
          om = OutcomeMeasurement(on)
          om.addEventRate(er)
          omList.append(om)
#          print '&&&&&&& Associating:', on.value, er.value, on.outcomeMeasurement, er.outcomeMeasurement
#          print er.outcomeNumber, on.textEventrate
          break

               
      if er.outcomeNumber == None:
        unmatchedER.append(er)
#        om = OutcomeMeasurement(er)
#        omList.append(om)
    # create outcome measurement templates lone on templates    
    for on in onList:
      if on.textEventrate == None:
        unmatchedON.append(on)
#        om = OutcomeMeasurement(on)
#        omList.append(om)
     
    for er in unmatchedER:
      if er.outcomeNumber == None:
        # eventrate still not matched, create outcome measurement just for it
        om = OutcomeMeasurement(er)
        omList.append(om)
                  
    for on in unmatchedON:
      if on.textEventrate == None:
        # outcome number still not matched, create outcome measurement just for it
        om = OutcomeMeasurement(on)
        omList.append(om)
                 
#    for om in omList:
#      om.display()
    sentence.templates.addOutcomeMeasurementList(omList)
Exemplo n.º 2
0
  def computeTrueStats(self, abstract):
    """ compute summaries statistics using annotations """
    self.outcomeNumbers = []
    self.eventRates = []
    self.stats = []
    self.trueStats = True
    self.abstract = abstract
    
    self.groupsById = {}
    self.outcomesById = {}
    self.timesById = {}
 
    omHash = {}
    
    for s in abstract.sentences:
      # find all of the annotated templates in the sentence
      templates = s.annotatedTemplates
      gList = templates.getList('group')
      oList = templates.getList('outcome')
      gsList = templates.getList('gs')
      onList = templates.getList('on') 
      erList = templates.getList('eventrate')     
      tList = templates.getList('time')
      
#      print abstract.id
#      for er in erList:
#        print er.value,
#      print
      
      for t in tList:
#         times.append(t)
        if t.getAnnotatedId() in self.timesById:
          self.timesById[t.getAnnotatedId()].merge(t)
        else:
          self.timesById[t.getAnnotatedId()] = t
          
      for g in gList:
#         groups.append(g)
        if g.getAnnotatedId() in self.groupsById:
          self.groupsById[g.getAnnotatedId()].merge(g)
        else:
          self.groupsById[g.getAnnotatedId()] = g

      for outcome in oList:
#         outcomes.append(outcome)
        if outcome.getAnnotatedId() != None and len(outcome.getAnnotatedId()) > 0:
          if outcome.getAnnotatedId() in self.outcomesById:
            self.outcomesById[outcome.getAnnotatedId()].merge(outcome)
          else:
            self.outcomesById[outcome.getAnnotatedId()] = outcome
        else:
          print abstract.id, outcome.name, 'does not have an ID.',
          print 'Not using it for summary stats.'

#       for gs in gsList:
#         self.groupSizes.append(gs)

      # link groups and their sizes
      for gs in gsList:
        gid = gs.token.getAnnotationAttribute('gs', 'group')
        if gid in self.groupsById:
          g = self.groupsById[gid]
          gs.group = g
          g.addSize(gs)
        tid = gs.token.getAnnotationAttribute('gs', 'time')
        if tid in self.timesById:
          t = self.timesById[tid]
          gs.time = t
          
#       for gid,g in self.groupsById.items():
#         print 'Group id:', gid, ', name = ', g.name, ', size =', g.getSize()
         
      # link all relevant information needed for each outcome measurement
      for on in onList:
        gid = on.token.getAnnotationAttribute('on', 'group')
        oid = on.token.getAnnotationAttribute('on', 'outcome')
        tid = on.token.getAnnotationAttribute('on', 'time')
        csID = on.token.getAnnotationAttribute('on', 'compareSet')
#        print 'on:',on.value, csID
        
        if oid in self.outcomesById:
          oTemplate = self.outcomesById[oid]
          gTemplate = self.groupsById.get(gid, None)        
          tTemplate = self.timesById.get(tid, None)
          
          if oid not in omHash:
            omHash[oid] = []
            
          om = OutcomeMeasurement(on)              
          om.addGroup(gTemplate)
          om.addOutcome(oTemplate)
          om.addTime(tTemplate)
          omHash[oid].append(om)
        else:
          print abstract.id, '??? Outcome number', on.value, 
          print 'does not have a matching outcome with id =', oid
#         print '-->',
#         om.write(sys.stdout)
        
        

             
      for er in erList:
        gid = er.token.getAnnotationAttribute('eventrate', 'group')
        oid = er.token.getAnnotationAttribute('eventrate', 'outcome')
        tid = er.token.getAnnotationAttribute('eventrate', 'time')
        csID = er.token.getAnnotationAttribute('eventrate', 'compareSet')
#        print abstract.id+': er: ',er.value, csID
        
        if oid in self.outcomesById:
          oTemplate = self.outcomesById[oid]
          gTemplate = self.groupsById.get(gid, None)        
          tTemplate = self.timesById.get(tid, None)
          
#          print abstract.id+': er: ', er.value, gTemplate, tTemplate, csID
          
          if oid not in omHash:
            omHash[oid] = []
          matchFound = False            
          for om in omHash[oid]:
            if om.getGroup() == gTemplate and om.getTime() == tTemplate and om.getCompareSetID() == csID:
              om.addEventRate(er)
#              print 'adding', er.value
#              om.write(sys.stdout)
              matchFound = True
              break
#            else:
#              print om.getGroup(), om.getTime(), om.getCompareSetID
              
          if matchFound == False:
            # event rate not added to existing outcome measurement, create new measurement
            om = OutcomeMeasurement(er)
            om.addGroup(gTemplate)
            om.addOutcome(oTemplate)
            om.addTime(tTemplate)
            omHash[oid].append(om)
        else:
          print 'Event rate missing outcome annotation in abstract ',
          print abstract.id, ':', s.toString()
          er.write(sys.stdout)     
     
        
    for oid in omHash.keys():
      omList = omHash[oid]
      for i in range(0, len(omList)):
        om1 = omList[i]
        csID1 = om1.getCompareSetID()
#        print abstract.id, csID1,':',
        for j in range(i+1, len(omList)):
          om2 = omList[j]
          csID2 = om2.getCompareSetID()
#          print csID2,
          if csID1 == csID2 and om1.isComplete() and om2.isComplete() \
            and om1.getGroup() != om2.getGroup() and om1.getTime() == om2.getTime():
            ssTemplate = SummaryStat(om1, om2, useAnnotated=True)
            self.stats.append(ssTemplate)
            om1.used = True
            om2.used = True
#        print
        if om1.used == False:
          self.unmatchedMeasurements.append(om1)
            
    for om in self.unmatchedMeasurements:
      if om.getOutcome() != None:
        om.getOutcome().unusedNumbers.append(om)   
    def linkTemplates(self, sentence):
        """ link group size and group templates using Hungarian matching algorithm """
        #    print 'linking all templates'
        templates = sentence.templates
        onList = templates.getList('on')
        erList = templates.getList('eventrate')

        abstract = sentence.abstract
        if abstract not in self.incompleteMatches:
            self.incompleteMatches[abstract] = []

        omHash = {}
        for er in erList:
            if er.group != None and er.outcome != None:
                # remember the mention matched with the value
                # save this information in a feature vector to be retrieved in linkQuantityAndMention()
                fv = FeatureVector(-1, -1, None)
                fv.mTemplate = er.group
                fv.qTemplate = er
                er.addMatchFeatures(fv)
                fv = FeatureVector(-1, -1, None)
                fv.mTemplate = er.outcome
                fv.qTemplate = er
                er.addMatchFeatures(fv)

                groupEntity = er.group.rootMention()
                outcomeEntity = er.outcome.rootMention()

                er.group = None
                er.outcome = None

                om = OutcomeMeasurement(er)

                if (groupEntity, outcomeEntity) not in omHash:
                    omHash[(groupEntity, outcomeEntity)] = om

                elif omHash[(groupEntity, outcomeEntity)] != None:
                    # there is already an outcome measurement for this group, outcome
                    # check if this one is closer
                    # closer if it distance to closest mention is less
                    #     if the same, use total distance
                    #     if that is the same, use value that occurs earlier in sentence
                    currentOM = omHash[(groupEntity, outcomeEntity)]
                    current = currentOM.getTextEventRate()
                    closest = self.closestValue(er, current)
                    if closest == None:
                        # both same distance, discard both
                        omHash[(groupEntity, outcomeEntity)] = None
                    elif closest == er:
                        omHash[(groupEntity, outcomeEntity)] = om
                        self.incompleteMatches[abstract].append(
                            OutcomeMeasurementAssociation(
                                groupEntity, outcomeEntity, currentOM, 0))
                    else:
                        self.incompleteMatches[abstract].append(
                            OutcomeMeasurementAssociation(
                                groupEntity, outcomeEntity, om, 0))

        for on in onList:
            if on.group != None and on.outcome != None:
                fv = FeatureVector(-1, -1, None)
                fv.mTemplate = on.group
                fv.qTemplate = on
                on.addMatchFeatures(fv)
                fv = FeatureVector(-1, -1, None)
                fv.mTemplate = on.outcome
                fv.qTemplate = on
                on.addMatchFeatures(fv)

                groupEntity = on.group.rootMention()
                outcomeEntity = on.outcome.rootMention()

                on.group = None
                on.outcome = None

                # check if this ON is useful, can we compute an event rate with it?
                gs = on.getGroupSize()
                if gs > 0:
                    # we can compute an event rate
                    om = OutcomeMeasurement(on)

                    if (groupEntity, outcomeEntity) not in omHash:
                        omHash[(groupEntity, outcomeEntity)] = om

                    elif omHash[(groupEntity, outcomeEntity)] != None:
                        # there is already a outcome measurement
                        currentOM = omHash[(groupEntity, outcomeEntity)]
                        currentON = currentOM.getOutcomeNumber()
                        currentER = currentOM.getTextEventRate()
                        # check if this on should be merged with an event rate
                        if currentON == None and currentER != None and on.equivalentEventRates(
                                currentER.eventRate()):
                            currentOM.addOutcomeNumber(on)
                        else:
                            # on not compatible with existing value
                            # is it closer?
                            if currentON != None and currentER != None:
                                closestVal = self.closestValue(
                                    currentON, currentER)
                                if closestVal == None:
                                    # if both the same distance, just use the ER
                                    closestVal = currentER
                            elif currentON != None:
                                closestVal = currentON
                            else:
                                closestVal = currentER

                            closestVal = self.closestValue(closestVal, on)
                            if closestVal == None:
                                # both same distance, discard both
                                omHash[(groupEntity, outcomeEntity)] = None
                            elif closestVal == on:
                                omHash[(groupEntity, outcomeEntity)] = om
                                self.incompleteMatches[abstract].append(
                                    OutcomeMeasurementAssociation(
                                        groupEntity, outcomeEntity, currentOM,
                                        0))
                            else:
                                self.incompleteMatches[abstract].append(
                                    OutcomeMeasurementAssociation(
                                        groupEntity, outcomeEntity, om, 0))

        omList = []
        for (group, outcome), om in omHash.items():
            if om != None:
                self.linkOutcomeMeasurementAssociations(
                    om, group, outcome, 0.5)
                omList.append(om)

        sentence.templates.addOutcomeMeasurementList(omList)