Python IDUtils примеры использования

Язык программирования: Python

Пространство имен/Пакет: InteractionXML

Класс/Тип: IDUtils

Примеров на hotexamples.com: 14

Python IDUtils - 14 примеров найдено. Это лучшие примеры Python кода для InteractionXML.IDUtils, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getNextFreeId(7)

Пример #1

Показать файл

Файл: processEquivs.py Проект: jbjorne/Tdevel

def insertInteraction(sentence, interaction):
    interactions = sentence.findall("interaction")
    newIdNumber = IDUtils.getNextFreeId(interactions)
    interaction.set("id", sentence.get("id") + ".i" + str(newIdNumber))
    
    # insert into sentence
    inserted = False
    for i in range(len(sentence)):
        if sentence[i].tag == "sentenceanalyses":
            sentence.insert(i, interaction)
            inserted = True
    assert inserted

Пример #2

Показать файл

Файл: processEquivs.py Проект: thiagoki/Tdevel

def insertInteraction(sentence, interaction):
    interactions = sentence.findall("interaction")
    newIdNumber = IDUtils.getNextFreeId(interactions)
    interaction.set("id", sentence.get("id") + ".i" + str(newIdNumber))

    # insert into sentence
    inserted = False
    for i in range(len(sentence)):
        if sentence[i].tag == "sentenceanalyses":
            sentence.insert(i, interaction)
            inserted = True
    assert inserted

Пример #3

Показать файл

Файл: ExampleUtils.py Проект: jbjorne/Tdevel

def process(sentenceObject, examplesBySentence, classSet, classIds, predictionsByExample):
    sentenceElement = sentenceObject.sentence
    sentenceId = sentenceElement.get("id")
    entityElements = sentenceElement.findall("entity")
    # remove non-name entities
    if entityElements != None:
        for entityElement in entityElements:
            if entityElement.get("isName") == "False": # interaction word
                sentenceElement.remove(entityElement)

    # add new pairs
    entityElements = sentenceElement.findall("entity")
    entityCount = IDUtils.getNextFreeId(entityElements)
    
    if examplesBySentence.has_key(sentenceId):
        # split merged examples
        for example in examplesBySentence[sentenceId][:]:
            prediction = predictionsByExample[example[0]]
            if classSet.getName(prediction[0]).find("---") != -1:
                nameSplits = classSet.getName(prediction[0]).split("---")
                prediction[0] = classSet.getId(nameSplits[0], False)
                count = 1
                for nameSplit in nameSplits[1:]:
                    newExample = example[:]
                    newExample[0] += ".dupl" + str(count)
                    examplesBySentence[sentenceId].append(newExample)
                    newPrediction = prediction[:]
                    newPrediction[0] = classSet.getId(nameSplit, False)
                    predictionsByExample[newExample[0]] = newPrediction
                    count += 1
        
        # remove negatives
        examplesToKeep = []
        for example in examplesBySentence[sentenceId]:
            prediction = predictionsByExample[example[0]]
            if prediction[0] != 1:
                examplesToKeep.append(example)
        examplesBySentence[sentenceId] = examplesToKeep
        
        map = {}
        for token in sentenceObject.tokens:
            map[token.get("id")] = {}
        addExistingEntities(map, entityElements, sentenceObject)
        addExamples(map, examplesBySentence[sentenceId])
        markFinal(map)
        entities = buildEntityNodes(map, sentenceObject, entityCount, classSet, classIds, predictionsByExample)
        interactions = buildInteractions(map, sentenceObject.sentence, predictionsByExample)
        for entity in entities:
            sentenceElement.append(entity)
        for interaction in interactions:
            sentenceElement.append(interaction)

#ENDIF

Пример #4

Показать файл

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None):
        self.assertSameSentence(examples)

        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        sentenceText = sentenceElement.get("text")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)

        # add new pairs
        for example in examples:
            prediction = predictionsByExample[example[0]]
            entityElement = ET.Element("entity")
            entityElement.attrib["isName"] = "False"
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            entityElement.set("charOffset", example[3]["charOffset"])
            entityElement.set("headOffset", headToken.get("charOffset"))
            entityElement.set("phraseType", example[3]["ptype"])
            entOffset = Range.charOffsetToSingleTuple(example[3]["charOffset"])
            entityElement.set("text",
                              sentenceText[entOffset[0]:entOffset[1] + 1])
            entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
            self.setElementType(entityElement, prediction, classSet, classIds)
            newEntityIdCount += 1
            sentenceElement.append(entityElement)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Пример #5

Показать файл

Файл: PhraseTriggerExampleWriter.py Проект: jbjorne/Tdevel

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None):        
        self.assertSameSentence(examples)
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        sentenceText = sentenceElement.get("text")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
            
        # add new pairs
        for example in examples:
            prediction = predictionsByExample[example[0]]
            entityElement = ET.Element("entity")
            entityElement.attrib["isName"] = "False"
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            entityElement.set("charOffset", example[3]["charOffset"]) 
            entityElement.set("headOffset", headToken.get("charOffset"))
            entityElement.set("phraseType", example[3]["ptype"])
            entOffset = Range.charOffsetToSingleTuple(example[3]["charOffset"])
            entityElement.set("text", sentenceText[entOffset[0]:entOffset[1]+1])
            entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
            self.setElementType(entityElement, prediction, classSet, classIds)
            newEntityIdCount += 1
            sentenceElement.append(entityElement)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Пример #6

Показать файл

Файл: AsymmetricEventExampleWriter.py Проект: thiagoki/Tdevel

   def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds):        
       self.assertSameSentence(examples)
       
       sentenceElement = sentenceObject.sentence
       sentenceId = sentenceElement.get("id")
       # detach analyses-element
       sentenceAnalysesElement = None
       sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
       if sentenceAnalysesElement != None:
           sentenceElement.remove(sentenceAnalysesElement)
       # remove pairs and interactions
       self.removeChildren(sentenceElement, ["pair", "interaction"])
       # remove entities
       self.removeNonNameEntities(sentenceElement)
       
       entityByTokenByType = {}
       # First add existing entities (names) (use sentenceElement, as sentenceObject still has all entities)
       for entity in sentenceElement.findall("entity"):
           headOffset = entity.get("headOffset")
           headToken = None
           for token in sentenceObject.tokens:
               if token.get("charOffset") == headOffset:
                   headToken = token
                   break
           assert headToken != None
           headTokenId = headToken.get("id")
           if not entityByTokenByType.has_key(headTokenId):
               entityByTokenByType[headTokenId] = {}
           entityByTokenByType[headTokenId][entity.get("type")] = entity
       
       # Then add entities defined by examples
       newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
       for example in examples:
           prediction = predictionsByExample[example[0]]
           if prediction[0] == 1:
               continue
           
           headTokenId = example[3]["t1"]
           if not entityByTokenByType.has_key(headTokenId):
               entityByTokenByType[headTokenId] = {}
           e1Type = classSet.getName(prediction[0])
           if e1Type == "Cause":
               continue
           
           # Maximum of one entity per type per token
           if entityByTokenByType[headTokenId].has_key(e1Type):
               continue
           
           entityElement = ET.Element("entity")
           entityByTokenByType[headTokenId][e1Type] = entityElement 
           entityElement.attrib["isName"] = "False"
           for token in sentenceObject.tokens:
               if token.get("id") == headTokenId:
                   headToken = token
                   break
           entityElement.attrib["charOffset"] = headToken.get("charOffset") 
           entityElement.attrib["headOffset"] = headToken.get("charOffset")
           entityElement.attrib["text"] = headToken.get("text")
           entityElement.attrib["id"] = sentenceId + ".e" + str(newEntityIdCount)
           entityElement.set("type", e1Type)
           newEntityIdCount += 1
           sentenceElement.append(entityElement)
   
       pairCount = 0
       for example in examples:
           prediction = predictionsByExample[example[0]]
           if prediction[0] == 1:
               continue
           exampleType = classSet.getName(prediction[0])
           t1Id = example[3]["t1"]
           t2Id = example[3]["t2"]
           
           if exampleType != "Cause":
               if entityByTokenByType.has_key(t2Id):
                   e1Id = entityByTokenByType[t1Id][exampleType].get("id")
                   for e2Type in sorted(entityByTokenByType[t2Id].keys()):
                       if exampleType.find("egulation") == -1 and e2Type != "Protein":
                           continue
                       pairElement = ET.Element("interaction")
                       pairElement.attrib["directed"] = "Unknown"
                       pairElement.attrib["e1"] = e1Id
                       pairElement.attrib["e2"] = entityByTokenByType[t2Id][e2Type].get("id")
                       pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                       pairElement.attrib["predictions"] = self.getEdgePredictionString(example, prediction, classSet, classIds)
                       pairElement.set("type", "Theme")
                       sentenceElement.append(pairElement)
                       pairCount += 1
           else:
               if entityByTokenByType.has_key(t1Id) and entityByTokenByType.has_key(t2Id): 
                   for e1Type in sorted(entityByTokenByType[t1Id].keys()):
                       if e1Type.find("egulation") == -1:
                           continue
                       for e2Type in sorted(entityByTokenByType[t2Id].keys()):
                           pairElement = ET.Element("interaction")
                           pairElement.attrib["directed"] = "Unknown"
                           pairElement.attrib["e1"] = entityByTokenByType[t1Id][e1Type].get("id")
                           pairElement.attrib["e2"] = entityByTokenByType[t2Id][e2Type].get("id")
                           pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                           pairElement.attrib["predictions"] = self.getEdgePredictionString(example, prediction, classSet, classIds)
                           pairElement.set("type", "Cause")
                           sentenceElement.append(pairElement)
                           pairCount += 1
 
       # re-attach the analyses-element
       if sentenceAnalysesElement != None:
           sentenceElement.append(sentenceAnalysesElement)

Пример #7

Показать файл

Файл: UnmergingExampleWriter.py Проект: jbjorne/Tdevel

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None):
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)

        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)

        # filter interactions
        interactionsToKeep = []
        for interaction in interactions:
            if interaction.get("type") != "neg":
                interactionsToKeep.append(interaction)
        interactions = interactionsToKeep

        # early out
        cutoff = 100
        if len(interactions) == 0 or len(interactions) > cutoff:
            # re-attach the analyses-element
            if sentenceAnalysesElement != None:
                sentenceElement.append(sentenceAnalysesElement)
            if len(interactions) > cutoff:
                print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get(
                    "id"
                ), "has more than", cutoff, "interactions, removing all."
            return

        interactionsByEntity = {}
        interactionsById = {}
        for entity in entities:
            interactionsByEntity[entity.get("id")] = []
        for interaction in interactions:
            e1Id = interaction.get("e1")
            if not interactionsByEntity.has_key(e1Id):
                interactionsByEntity[e1Id] = []
            interactionsByEntity[e1Id].append(interaction)
            interactionsById[interaction.get("id")] = interaction

        # NOTE! Following won't work for pairs
        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []

        # Mapping for connecting the events
        self.entitiesByHeadByType = {}
        # self.tokenByOffset = {}
        # for token in sentenceObject.tokens:
        #    self.tokenByOffset[token.get("charOffset")] = token
        #    self.entityByHeadByType[token.get("charOffset")] = {}
        for entity in sentenceObject.entities:
            # by offset
            offset = entity.get("headOffset")
            if not self.entitiesByHeadByType.has_key(offset):
                self.entitiesByHeadByType[offset] = {}
            # by type
            eType = entity.get("type")
            if entity.get("isName") != "True":
                self.entitiesByHeadByType[offset][eType] = []
            else:  # add names to structure
                if not self.entitiesByHeadByType[offset].has_key(eType):
                    self.entitiesByHeadByType[offset][eType] = []
                self.entitiesByHeadByType[offset][eType].append(entity)

        entityKeys = sentenceObject.entitiesById.keys()
        exampleByEntityId = {}
        for example in examples:
            # if predictionsByExample[example[0]][0] == 1: # negative
            #    continue
            eId = example[3]["e"]
            assert eId in entityKeys
            if not exampleByEntityId.has_key(eId):
                exampleByEntityId[eId] = []
            exampleByEntityId[eId].append(example)

        # This doesn't work, it was an attempt to include
        # only the positive example with the highest prediction strength
        #        for key in sorted(exampleByEntityId.keys()):
        #            eType = sentenceObject.entitiesById[key].get("type")
        #            eExamples = exampleByEntityId[key]
        #            if eType == "Binding" and len(eExamples) > 1:
        #                maxArgs = -1
        #                maxStr = -999999999
        #                for example in eExamples:
        #                    if predictionsByExample[example[0]][0] == 1:
        #                        continue
        #                    numArgs = example[3]["i"].count(",") + 1
        #                    if numArgs > maxArgs:
        #                        maxArgs = numArgs
        #                    predClass = predictionsByExample[example[0]][0]
        #                    predictionStrength = predictionsByExample[example[0]][predClass]
        #                    if predictionStrength > maxStr:
        #                        maxStr = predictionStrength
        #                #print maxArgs, len(eExamples)
        #                for example in eExamples:
        #                    if predictionsByExample[example[0]][0] == 1:
        #                        continue
        #                    predClass = predictionsByExample[example[0]][0]
        #                    predictionStrength = predictionsByExample[example[0]][predClass]
        #                    if predictionStrength != maxStr:
        #                        examples.remove(example)
        #                    #if example[3]["i"].count(",") + 1 < maxArgs:
        #                    #    examples.remove(example)

        # self.newEntitiesById = {}
        # self.outEdgesByEntity = {}

        # Gather arguments for the simple, one-argument events
        argumentsByExample = {}
        positiveExamples = []
        exampleIdCount = 0
        for entity in entities:
            # If no example, case is unambiguous
            if entity.get("id") not in exampleByEntityId:
                simpleEventInteractions = interactionsByEntity[entity.get("id")]
                numCauses = 0
                numThemes = 0
                for interaction in simpleEventInteractions[:]:
                    if self.isIntersentence(interaction):
                        print "Warning, intersentence interaction for", entity.get("id"), entity.get("type")
                        simpleEventInteractions.remove(interaction)
                        continue
                    if interaction.get("type") == "neg":
                        simpleEventInteractions.remove(interaction)
                        continue
                    iType = interaction.get("type")
                    if iType == "Cause":
                        numCauses += 1
                    elif iType == "Theme":
                        numThemes += 1
                eType = entity.get("type")
                assert (
                    numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType != "Binding")
                ), (numThemes, numCauses, eType, entity.get("id"), [x[0] for x in examples], entityKeys)
                # assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType == "Binding"), (numThemes,numCauses,eType,entity.get("id"))
                for interaction in simpleEventInteractions:
                    self.counts["simple-" + eType + "-" + interaction.get("type")] += 1
                    exampleId = "simple." + str(exampleIdCount)
                    exampleIdCount += 1
                    positiveExamples.append([exampleId, None, None, None])
                    argumentsByExample[exampleId] = [interaction]
                    # self.addEvent([interaction], sentenceObject, "simple")

        # Gather arguments for predicted, unmerged events
        for example in examples:
            # print predictionsByExample[example[0]]
            if predictionsByExample[example[0]][0] == 1:  # negative
                continue
            positiveExamples.append(example)
            arguments = []
            for iId in example[3]["i"].split(","):
                if iId == "":  # processes can have 0 arguments
                    assert "etype" in example[3], example[3]
                    assert example[3]["etype"] == "Process", example[3]
                    break
                arg = interactionsById[iId]
                if self.isIntersentence(arg):
                    continue
                assert arg.get("type") != "neg"
                arguments.append(arg)
            argumentsByExample[example[0]] = arguments

        # Loop until all positive examples are added. This process
        # assumes that the events (mostly) form a directed acyclic
        # graph, which can written by "growing" the structure from
        # the "leaf" events, and consecutively adding levels of
        # nesting events.
        examplesLeft = len(positiveExamples)
        exampleAdded = {}
        for example in positiveExamples:
            exampleAdded[example[0]] = False
        forceAdd = False
        forcedCount = 0
        while examplesLeft > 0:
            if len(self.newEntities) > 100:
                print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get(
                    "id"
                ), "has generated more than", cutoff, "events, skipping the rest."
                break
            examplesAddedThisRound = 0
            # For each round, loop through the potentially remaining examples
            for example in positiveExamples:
                if len(self.newEntities) > 100:
                    break
                if exampleAdded[example[0]]:  # This event has already been inserted
                    continue
                arguments = argumentsByExample[example[0]]
                # An event can be added if all of its argument events have already
                # been added. Addition is forced if lack of argument events blocks
                # the process.
                if forceAdd or self.argumentEntitiesExist(arguments, sentenceObject):
                    umType = "complex"  # mark the root entity in the output xml
                    predictionStrength = None
                    if example[0].find("simple") != -1:
                        umType = "simple"
                    else:
                        # Prediction strength is only available for classified argument groups
                        predictionStrength = self.getPredictionStrength(
                            example, predictionsByExample, classSet, classIds
                        )
                    # print example
                    if (
                        umType != "simple"
                        and "eType" in example[3]
                        and example[3]["etype"] == "Process"
                        and len(arguments) == 0
                    ):
                        origProcess = sentenceObject.entitiesById[example[3]["e"]]
                        # Put back the original entity
                        newProcess = self.addEntity(origProcess)
                        newProcess.set("umType", umType)
                        if predictionStrength != None:
                            newProcess.set("umStrength", str(predictionStrength))
                    else:  # example has arguments
                        self.addEvent(arguments, sentenceObject, umType, forceAdd, predictionStrength)
                    exampleAdded[example[0]] = True
                    examplesLeft -= 1
                    examplesAddedThisRound += 1
                    forceAdd = False
            if examplesLeft > 0 and examplesAddedThisRound == 0:
                # If there are examples left, but nothing was added, this
                # means that some nested events are missing. Theoretically
                # this could also be because two events are referring to
                # each other, preventing each other's insertion. In any
                # case this is solved by simply forcing the addition of
                # the first non-inserted event, by creating 0-argument
                # entities for its argument events.
                forcedCount += 1
                # print "Warning, forcing event addition"
                forceAdd = True

        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Пример #8

Показать файл

Файл: UnmergedEdgeExampleWriter.py Проект: thiagoki/Tdevel

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds):        
        self.assertSameSentence(examples)
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        self.removeChildren(sentenceElement, ["pair", "interaction"])
        
        # remove negative predicted entities
        self.removeChildren(sentenceElement, ["entity"], {"type":"neg"})
        
        # add required entities for dummy nodes with positive interactions
        dummies = {}
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        for example in examples:
            prediction = predictionsByExample[example[0]]
            #if self.isNegative(prediction, classSet):
            #    continue
            assert example[3]["d1"] in ["T","F"], ("Example d1 error:", example)
            assert example[3]["d2"] in ["T","F"], ("Example d2 error:", example)
            for node in ["1","2"]:
                d = example[3]["d"+node]
                if d == "T": # Node is a dummy node
                    e = example[3]["e"+node]
                    l = example[3]["l"+node]
                    if not dummies.has_key(e): dummies[e] = {}
                    if not dummies[e].has_key(l): # Create a real node for the empty slot
                        entityElement = ET.Element("entity")
                        entityElement.attrib["isName"] = "False"
                        headToken = example[3]["t"+node]
                        for token in sentenceObject.tokens:
                            if token.get("id") == headToken:
                                headToken = token
                                break
                        entityElement.set("charOffset", headToken.get("charOffset")) 
                        entityElement.set("headOffset", headToken.get("charOffset"))
                        entityElement.set("text", headToken.get("text"))
                        entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
                        #self.setElementType(entityElement, prediction, classSet, classIds)
                        entityElement.set("type", sentenceObject.entitiesById[e].get("type"))
                        # Add element to sentence
                        newEntityIdCount += 1
                        sentenceElement.append(entityElement)
                        newEntityId = entityElement.get("id")
                        #print "newEntityId",newEntityId
                        assert not sentenceObject.entitiesById.has_key(newEntityId)
                        sentenceObject.entitiesById[newEntityId] = entityElement
                        # Keep track of created dummies
                        dummies[e][l] = entityElement

        # select examples for correct edge combinations
        #print "DUMMIES", dummies
        #print sentenceObject.entitiesById
        examples = self.getValidExamples(examples, predictionsByExample, sentenceObject, dummies, classSet, classIds)
        
        # add interactions
        pairCount = 0
        for example in examples:
            prediction = predictionsByExample[example[0]]
            #if self.isNegative(prediction, classSet):
            #    continue
            pairElement = ET.Element("interaction")
            if example[3].has_key("discarded") and example[3]["discarded"]:
                pairElement.attrib["discarded"] = "True"
            pairElement.attrib["directed"] = "Unknown"
            if example[3]["d1"] == "F":
                pairElement.attrib["e1"] = example[3]["e1"]
            else:
                pairElement.attrib["e1"] = dummies[example[3]["e1"]][example[3]["l1"]].get("id")
            if example[3]["d2"] == "F":
                pairElement.attrib["e2"] = example[3]["e2"]
            else:
                pairElement.attrib["e2"] = dummies[example[3]["e2"]][example[3]["l2"]].get("id")
            pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
            self.setElementType(pairElement, prediction, classSet, classIds)
            sentenceElement.append(pairElement)
            pairCount += 1
  
        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Пример #9

Показать файл

Файл: GeniaEventsToSharedTask.py Проект: thiagoki/Tdevel

def run(input, output, eventDir, parse="split-mccc-preparsed", verbose=False):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()

    counts = defaultdict(int)
    for document in corpusRoot.findall("document"):
        sentDict = None
        pmid = document.get("pmid")
        isPMC = False
        for sentence in document.findall("sentence"):
            counts["sentences"] += 1
            sentenceId = str(sentence.get("id")) + "/" + str(
                sentence.get("origId"))
            if verbose: print "Processing", sentenceId
            if sentDict == None:
                if sentence.get("origId") != None:
                    assert pmid == None
                    sentDict = loadEventXML(
                        eventDir + "/" + sentence.get("origId").split(".")[0] +
                        ".xml",
                        verbose=verbose)
                else:
                    #pmid = sentence.get("pmid")
                    assert pmid != None
                    if pmid.startswith("PMC"):
                        isPMC = True
                        sentDict = {}
                    else:
                        assert pmid.startswith("PMID")
                        sentDict = loadEventXML(
                            eventDir + "/" + pmid.split("-", 1)[-1] + ".xml",
                            verbose=verbose)
            interactionXMLText = sentence.get("text")
            if not sentDict.has_key(interactionXMLText):
                counts["missing-sentences"] += 1
                if isPMC: counts["missing-sentences-PMC"] += 1
                if verbose:
                    print "Missing sentence:", pmid, (sentenceId, sentDict,
                                                      sentence.get("text"))
            else:
                sentenceAnalyses = sentence.find("sentenceanalyses")
                if sentenceAnalyses != None:
                    sentence.remove(sentenceAnalyses)
                entityIdCount = IDUtils.getNextFreeId(
                    sentence.findall("entity"))
                events = sentDict[interactionXMLText]
                events.sort()
                for event in events:
                    if not keepEvent(event[2]):
                        counts["filtered-triggers"] += 1
                        continue
                    trigger = ET.Element("entity")
                    trigger.set("isName", "False")
                    trigger.set("charOffset",
                                str(event[0]) + "-" + str(event[1]))
                    trigger.set("type", str(event[2]))
                    trigger.set("text", str(event[3]))
                    trigger.set("source", "GENIA_event_annotation_0.9")
                    trigger.set("id",
                                sentence.get("id") + ".e" + str(entityIdCount))
                    entityIdCount += 1
                    counts["added-triggers"] += 1
                    sentence.append(trigger)
                if sentenceAnalyses != None:
                    sentence.append(sentenceAnalyses)

    FindHeads.findHeads(corpusTree, parse, removeExisting=False)
    removeDuplicates(corpusRoot)
    print counts

    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree

Пример #10

Показать файл

Файл: EntityExampleWriter.py Проект: jbjorne/Tdevel

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None):        
        self.assertSameSentence(examples)
        
        extensionRequested = False
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
        
        # gold sentence elements
        goldEntityTypeByHeadOffset = {}
        goldEntityByHeadOffset = {}
        if goldSentence != None:
            for entity in goldSentence.entities:
                headOffset = entity.get("headOffset")
                if not goldEntityTypeByHeadOffset.has_key(headOffset):
                    goldEntityTypeByHeadOffset[headOffset] = []
                    goldEntityByHeadOffset[headOffset] = []
                goldEntityTypeByHeadOffset[headOffset].append(entity)
                goldEntityByHeadOffset[headOffset].append(entity)
            for key in goldEntityTypeByHeadOffset:
                goldEntityTypeByHeadOffset[key] =  self.getMergedEntityType(goldEntityTypeByHeadOffset[key])
            for token in sentenceObject.tokens:
                if not goldEntityTypeByHeadOffset.has_key(token.get("charOffset")):
                    goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"
            
        # add new pairs
        for example in examples:
            unmergeEPINeg = None
            if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
                unmergeEPINeg = headToken.get("text")
            if "trigex" in example[3] and example[3]["trigex"] == "bb":
                extensionRequested = True
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            prediction = predictionsByExample[example[0]]
            predictionString = self.getPredictionStrengthString(prediction, classSet, classIds)
            for eType in self.getElementTypes(prediction, classSet, classIds, unmergeEPINegText=unmergeEPINeg): # split merged classes
                entityElement = ET.Element("entity")
                entityElement.set("isName", "False")
                entityElement.set("charOffset", headToken.get("charOffset"))
                entityElement.set("headOffset", headToken.get("charOffset"))
                entityElement.set("text", headToken.get("text"))
                entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
                entityElement.set("type", eType)
                entityElement.set("predictions", predictionString)
                #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg)
                if self.insertWeights: # in other words, use gold types
                    headOffset = headToken.get("charOffset")
                    if goldEntityByHeadOffset.has_key(headOffset):
                        for entity in goldEntityByHeadOffset[headOffset]:
                            entity.set("predictions", entityElement.get("predictions") )
                if goldEntityTypeByHeadOffset.has_key(headToken.get("charOffset")):
                    entityElement.set("goldType", goldEntityTypeByHeadOffset[headToken.get("charOffset")])
                if "goldIds" in example[3]: # The entities for which this example was built
                    entityElement.set("goldIds", example[3]["goldIds"])
                if (entityElement.get("type") != "neg" and not goldEntityByHeadOffset.has_key(entityElement.get("headOffset"))) or not self.insertWeights:
                    newEntityIdCount += 1
                    sentenceElement.append(entityElement)
                elif entityElement.get("type") == "neg":
                    pass
                    #newEntityIdCount += 1
                    #sentenceElement.append(entityElement)
        
        # if only adding weights, re-attach interactions and gold entities
        if self.insertWeights:
            for entity in nonNameEntities:
                sentenceElement.append(entity)
            for interaction in interactions:
                sentenceElement.append(interaction)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)
        
        # Extend bacteria triggers
        if extensionRequested:
            InteractionXML.ExtendTriggers.extend(sentenceElement, entityTypes=["Bacterium"])

Пример #11

Показать файл

Файл: EntityExampleWriter.py Проект: thiagoki/Tdevel

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None):
        self.assertSameSentence(examples)

        extensionRequested = False

        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)

        # gold sentence elements
        goldEntityTypeByHeadOffset = {}
        goldEntityByHeadOffset = {}
        if goldSentence != None:
            for entity in goldSentence.entities:
                headOffset = entity.get("headOffset")
                if not goldEntityTypeByHeadOffset.has_key(headOffset):
                    goldEntityTypeByHeadOffset[headOffset] = []
                    goldEntityByHeadOffset[headOffset] = []
                goldEntityTypeByHeadOffset[headOffset].append(entity)
                goldEntityByHeadOffset[headOffset].append(entity)
            for key in goldEntityTypeByHeadOffset:
                goldEntityTypeByHeadOffset[key] = self.getMergedEntityType(
                    goldEntityTypeByHeadOffset[key])
            for token in sentenceObject.tokens:
                if not goldEntityTypeByHeadOffset.has_key(
                        token.get("charOffset")):
                    goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"

        # add new pairs
        for example in examples:
            unmergeEPINeg = None
            if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
                unmergeEPINeg = headToken.get("text")
            if "trigex" in example[3] and example[3]["trigex"] == "bb":
                extensionRequested = True
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            prediction = predictionsByExample[example[0]]
            predictionString = self.getPredictionStrengthString(
                prediction, classSet, classIds)
            for eType in self.getElementTypes(
                    prediction,
                    classSet,
                    classIds,
                    unmergeEPINegText=unmergeEPINeg):  # split merged classes
                entityElement = ET.Element("entity")
                entityElement.set("isName", "False")
                entityElement.set("charOffset", headToken.get("charOffset"))
                entityElement.set("headOffset", headToken.get("charOffset"))
                entityElement.set("text", headToken.get("text"))
                entityElement.set("id",
                                  sentenceId + ".e" + str(newEntityIdCount))
                entityElement.set("type", eType)
                entityElement.set("predictions", predictionString)
                #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg)
                if self.insertWeights:  # in other words, use gold types
                    headOffset = headToken.get("charOffset")
                    if goldEntityByHeadOffset.has_key(headOffset):
                        for entity in goldEntityByHeadOffset[headOffset]:
                            entity.set("predictions",
                                       entityElement.get("predictions"))
                if goldEntityTypeByHeadOffset.has_key(
                        headToken.get("charOffset")):
                    entityElement.set(
                        "goldType", goldEntityTypeByHeadOffset[headToken.get(
                            "charOffset")])
                if "goldIds" in example[
                        3]:  # The entities for which this example was built
                    entityElement.set("goldIds", example[3]["goldIds"])
                if (entityElement.get("type") != "neg"
                        and not goldEntityByHeadOffset.has_key(
                            entityElement.get("headOffset"))
                    ) or not self.insertWeights:
                    newEntityIdCount += 1
                    sentenceElement.append(entityElement)
                elif entityElement.get("type") == "neg":
                    pass
                    #newEntityIdCount += 1
                    #sentenceElement.append(entityElement)

        # if only adding weights, re-attach interactions and gold entities
        if self.insertWeights:
            for entity in nonNameEntities:
                sentenceElement.append(entity)
            for interaction in interactions:
                sentenceElement.append(interaction)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

        # Extend bacteria triggers
        if extensionRequested:
            InteractionXML.ExtendTriggers.extend(sentenceElement,
                                                 entityTypes=["Bacterium"])

Пример #12

Показать файл

Файл: UnmergingExampleWriter.py Проект: thiagoki/Tdevel

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None):
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)

        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)

        # filter interactions
        interactionsToKeep = []
        for interaction in interactions:
            if interaction.get("type") != "neg":
                interactionsToKeep.append(interaction)
        interactions = interactionsToKeep

        # early out
        cutoff = 100
        if len(interactions) == 0 or len(interactions) > cutoff:
            # re-attach the analyses-element
            if sentenceAnalysesElement != None:
                sentenceElement.append(sentenceAnalysesElement)
            if len(interactions) > cutoff:
                print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get(
                    "id"
                ), "has more than", cutoff, "interactions, removing all."
            return

        interactionsByEntity = {}
        interactionsById = {}
        for entity in entities:
            interactionsByEntity[entity.get("id")] = []
        for interaction in interactions:
            e1Id = interaction.get("e1")
            if not interactionsByEntity.has_key(e1Id):
                interactionsByEntity[e1Id] = []
            interactionsByEntity[e1Id].append(interaction)
            interactionsById[interaction.get("id")] = interaction

        # NOTE! Following won't work for pairs
        self.entityCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(
            sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []

        # Mapping for connecting the events
        self.entitiesByHeadByType = {}
        #self.tokenByOffset = {}
        #for token in sentenceObject.tokens:
        #    self.tokenByOffset[token.get("charOffset")] = token
        #    self.entityByHeadByType[token.get("charOffset")] = {}
        for entity in sentenceObject.entities:
            # by offset
            offset = entity.get("headOffset")
            if not self.entitiesByHeadByType.has_key(offset):
                self.entitiesByHeadByType[offset] = {}
            # by type
            eType = entity.get("type")
            if entity.get("isName") != "True":
                self.entitiesByHeadByType[offset][eType] = []
            else:  # add names to structure
                if not self.entitiesByHeadByType[offset].has_key(eType):
                    self.entitiesByHeadByType[offset][eType] = []
                self.entitiesByHeadByType[offset][eType].append(entity)

        entityKeys = sentenceObject.entitiesById.keys()
        exampleByEntityId = {}
        for example in examples:
            #if predictionsByExample[example[0]][0] == 1: # negative
            #    continue
            eId = example[3]["e"]
            assert eId in entityKeys
            if not exampleByEntityId.has_key(eId):
                exampleByEntityId[eId] = []
            exampleByEntityId[eId].append(example)

        # This doesn't work, it was an attempt to include
        # only the positive example with the highest prediction strength
#        for key in sorted(exampleByEntityId.keys()):
#            eType = sentenceObject.entitiesById[key].get("type")
#            eExamples = exampleByEntityId[key]
#            if eType == "Binding" and len(eExamples) > 1:
#                maxArgs = -1
#                maxStr = -999999999
#                for example in eExamples:
#                    if predictionsByExample[example[0]][0] == 1:
#                        continue
#                    numArgs = example[3]["i"].count(",") + 1
#                    if numArgs > maxArgs:
#                        maxArgs = numArgs
#                    predClass = predictionsByExample[example[0]][0]
#                    predictionStrength = predictionsByExample[example[0]][predClass]
#                    if predictionStrength > maxStr:
#                        maxStr = predictionStrength
#                #print maxArgs, len(eExamples)
#                for example in eExamples:
#                    if predictionsByExample[example[0]][0] == 1:
#                        continue
#                    predClass = predictionsByExample[example[0]][0]
#                    predictionStrength = predictionsByExample[example[0]][predClass]
#                    if predictionStrength != maxStr:
#                        examples.remove(example)
#                    #if example[3]["i"].count(",") + 1 < maxArgs:
#                    #    examples.remove(example)

#self.newEntitiesById = {}
#self.outEdgesByEntity = {}

# Gather arguments for the simple, one-argument events
        argumentsByExample = {}
        positiveExamples = []
        exampleIdCount = 0
        for entity in entities:
            # If no example, case is unambiguous
            if entity.get("id") not in exampleByEntityId:
                simpleEventInteractions = interactionsByEntity[entity.get(
                    "id")]
                numCauses = 0
                numThemes = 0
                for interaction in simpleEventInteractions[:]:
                    if self.isIntersentence(interaction):
                        print "Warning, intersentence interaction for", entity.get(
                            "id"), entity.get("type")
                        simpleEventInteractions.remove(interaction)
                        continue
                    if interaction.get("type") == "neg":
                        simpleEventInteractions.remove(interaction)
                        continue
                    iType = interaction.get("type")
                    if iType == "Cause":
                        numCauses += 1
                    elif iType == "Theme":
                        numThemes += 1
                eType = entity.get("type")
                assert numThemes == 0 or (numThemes != 0 and numCauses
                                          == 0) or (numThemes > 1
                                                    and eType != "Binding"), (
                                                        numThemes,
                                                        numCauses, eType,
                                                        entity.get("id"), [
                                                            x[0]
                                                            for x in examples
                                                        ], entityKeys)
                #assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType == "Binding"), (numThemes,numCauses,eType,entity.get("id"))
                for interaction in simpleEventInteractions:
                    self.counts["simple-" + eType + "-" +
                                interaction.get("type")] += 1
                    exampleId = "simple." + str(exampleIdCount)
                    exampleIdCount += 1
                    positiveExamples.append([exampleId, None, None, None])
                    argumentsByExample[exampleId] = [interaction]
                    #self.addEvent([interaction], sentenceObject, "simple")

        # Gather arguments for predicted, unmerged events
        for example in examples:
            #print predictionsByExample[example[0]]
            if predictionsByExample[example[0]][0] == 1:  # negative
                continue
            positiveExamples.append(example)
            arguments = []
            for iId in example[3]["i"].split(","):
                if iId == "":  # processes can have 0 arguments
                    assert "etype" in example[3], example[3]
                    assert example[3]["etype"] == "Process", example[3]
                    break
                arg = interactionsById[iId]
                if self.isIntersentence(arg):
                    continue
                assert arg.get("type") != "neg"
                arguments.append(arg)
            argumentsByExample[example[0]] = arguments

        # Loop until all positive examples are added. This process
        # assumes that the events (mostly) form a directed acyclic
        # graph, which can written by "growing" the structure from
        # the "leaf" events, and consecutively adding levels of
        # nesting events.
        examplesLeft = len(positiveExamples)
        exampleAdded = {}
        for example in positiveExamples:
            exampleAdded[example[0]] = False
        forceAdd = False
        forcedCount = 0
        while examplesLeft > 0:
            if len(self.newEntities) > 100:
                print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get(
                    "id"
                ), "has generated more than", cutoff, "events, skipping the rest."
                break
            examplesAddedThisRound = 0
            # For each round, loop through the potentially remaining examples
            for example in positiveExamples:
                if len(self.newEntities) > 100:
                    break
                if exampleAdded[
                        example[0]]:  # This event has already been inserted
                    continue
                arguments = argumentsByExample[example[0]]
                # An event can be added if all of its argument events have already
                # been added. Addition is forced if lack of argument events blocks
                # the process.
                if forceAdd or self.argumentEntitiesExist(
                        arguments, sentenceObject):
                    umType = "complex"  # mark the root entity in the output xml
                    predictionStrength = None
                    if example[0].find("simple") != -1:
                        umType = "simple"
                    else:
                        # Prediction strength is only available for classified argument groups
                        predictionStrength = self.getPredictionStrength(
                            example, predictionsByExample, classSet, classIds)
                    #print example
                    if umType != "simple" and "eType" in example[
                            3] and example[3]["etype"] == "Process" and len(
                                arguments) == 0:
                        origProcess = sentenceObject.entitiesById[example[3]
                                                                  ["e"]]
                        # Put back the original entity
                        newProcess = self.addEntity(origProcess)
                        newProcess.set("umType", umType)
                        if predictionStrength != None:
                            newProcess.set("umStrength",
                                           str(predictionStrength))
                    else:  # example has arguments
                        self.addEvent(arguments, sentenceObject, umType,
                                      forceAdd, predictionStrength)
                    exampleAdded[example[0]] = True
                    examplesLeft -= 1
                    examplesAddedThisRound += 1
                    forceAdd = False
            if examplesLeft > 0 and examplesAddedThisRound == 0:
                # If there are examples left, but nothing was added, this
                # means that some nested events are missing. Theoretically
                # this could also be because two events are referring to
                # each other, preventing each other's insertion. In any
                # case this is solved by simply forcing the addition of
                # the first non-inserted event, by creating 0-argument
                # entities for its argument events.
                forcedCount += 1
                #print "Warning, forcing event addition"
                forceAdd = True

        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Пример #13

Показать файл

Файл: GeniaEventsToSharedTask.py Проект: jbjorne/Tdevel

def run(input, output, eventDir, parse="split-mccc-preparsed", verbose=False):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    counts = defaultdict(int)
    for document in corpusRoot.findall("document"):
        sentDict = None
        pmid = document.get("pmid")
        isPMC = False
        for sentence in document.findall("sentence"):
            counts["sentences"] += 1
            sentenceId = str(sentence.get("id")) + "/" + str(sentence.get("origId"))
            if verbose: print "Processing", sentenceId
            if sentDict == None:
                if sentence.get("origId") != None:
                    assert pmid == None
                    sentDict = loadEventXML( eventDir + "/" + sentence.get("origId").split(".")[0] + ".xml" , verbose=verbose)
                else:
                    #pmid = sentence.get("pmid")
                    assert pmid != None
                    if pmid.startswith("PMC"):
                        isPMC = True
                        sentDict = {}
                    else:
                        assert pmid.startswith("PMID")
                        sentDict = loadEventXML( eventDir + "/" + pmid.split("-", 1)[-1] + ".xml" , verbose=verbose)
            interactionXMLText = sentence.get("text")
            if not sentDict.has_key(interactionXMLText):
                counts["missing-sentences"] += 1
                if isPMC: counts["missing-sentences-PMC"] += 1
                if verbose: print "Missing sentence:", pmid, (sentenceId, sentDict, sentence.get("text"))
            else:
                sentenceAnalyses = sentence.find("sentenceanalyses")
                if sentenceAnalyses != None:
                    sentence.remove(sentenceAnalyses)
                entityIdCount = IDUtils.getNextFreeId(sentence.findall("entity"))
                events = sentDict[interactionXMLText]
                events.sort()
                for event in events:
                    if not keepEvent(event[2]):
                        counts["filtered-triggers"] += 1
                        continue
                    trigger = ET.Element("entity")
                    trigger.set("isName", "False")
                    trigger.set("charOffset", str(event[0]) + "-" + str(event[1]))
                    trigger.set("type", str(event[2]))
                    trigger.set("text", str(event[3]))
                    trigger.set("source", "GENIA_event_annotation_0.9")
                    trigger.set("id", sentence.get("id") + ".e" + str(entityIdCount))
                    entityIdCount += 1
                    counts["added-triggers"] += 1
                    sentence.append(trigger)
                if sentenceAnalyses != None:
                    sentence.append(sentenceAnalyses)
    
    FindHeads.findHeads(corpusTree, parse, removeExisting=False)
    removeDuplicates(corpusRoot)
    print counts
    
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree

Пример #14

Показать файл

Файл: ExampleUtils.py Проект: jbjorne/Tdevel

def _writeExamplesToInteractionXML(examples, predictionsByExample, sentenceObject, classSet, classIds, xType):
    currentSetMajorId = None
    for example in examples:
        majorId, minorId = example[0].rsplit(".x", 1)
        if currentSetMajorId == None: 
            currentSetMajorId = majorId
        else: 
            assert currentSetMajorId == majorId, str(currentSetMajorId) + "/" + str(majorId)
    
    sentenceElement = sentenceObject.sentence
    sentenceId = sentenceElement.get("id")
    # Dummy structure for backwards compatibility
    examplesBySentence = {}
    if len(examples) > 0:
        examplesBySentence[sentenceId] = examples
    # detach analyses
    sentenceAnalysesElement = None
    sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
    if sentenceAnalysesElement != None:
        sentenceElement.remove(sentenceAnalysesElement)
    # remove pairs and interactions
    pairElements = sentenceElement.findall("pair")
    if pairElements != None:
        for pairElement in pairElements:
            sentenceElement.remove(pairElement)
    interactionElements = sentenceElement.findall("interaction")
    if interactionElements != None:
        for interactionElement in interactionElements:
            sentenceElement.remove(interactionElement)
    # remove entities
    if xType == "token":
        entityElements = sentenceElement.findall("entity")
        entityCount = 0
        if entityElements != None:
            entityCount = len(entityElements) # get the count _before_ removing entities
            for entityElement in entityElements:
                if entityElement.get("isName") == "False": # interaction word
                    sentenceElement.remove(entityElement)
        # add new pairs
        entityElements = sentenceElement.findall("entity")
        newEntityIdCount = IDUtils.getNextFreeId(entityElements)
        if examplesBySentence.has_key(sentenceId):
            for example in examplesBySentence[sentenceId]:
                prediction = predictionsByExample[example[0]]
                entityElement = ET.Element("entity")
                entityElement.attrib["isName"] = "False"
                headToken = example[3]["t"]
                for token in sentenceObject.tokens:
                    if token.get("id") == headToken:
                        headToken = token
                        break
                entityElement.attrib["charOffset"] = headToken.get("charOffset") 
                entityElement.attrib["headOffset"] = headToken.get("charOffset")
                entityElement.attrib["text"] = headToken.get("text")
                entityElement.attrib["id"] = sentenceId + ".e" + str(newEntityIdCount)
                newEntityIdCount += 1
                if classSet == None: # binary classification
                    if prediction[0] > 0:
                        entityElement.attrib["type"] = str(True)
                    else:
                        entityElement.attrib["type"] = str(False)
                else:
                    entityElement.attrib["type"] = classSet.getName(prediction[0])
                    classWeights = prediction[1:]
                    predictionString = ""
                    for i in range(len(classWeights)):
                        if predictionString != "":
                            predictionString += ","
                        predictionString += classSet.getName(classIds[i]) + ":" + str(classWeights[i])
                    entityElement.attrib["predictions"] = predictionString
                #if entityElement.attrib["type"] != "neg":
                sentenceElement.append(entityElement)
                entityCount += 1
    elif xType == "edge":
        pairCount = 0
        if examplesBySentence.has_key(sentenceId):
            for example in examplesBySentence[sentenceId]:
                prediction = predictionsByExample[example[0]]
                pairElement = ET.Element("interaction")
                #pairElement.attrib["origId"] = origId
                #pairElement.attrib["type"] = example[3]["categoryName"]
                pairElement.attrib["directed"] = "Unknown"
                pairElement.attrib["e1"] = example[3]["e1"] #.attrib["id"]
                pairElement.attrib["e2"] = example[3]["e2"] #.attrib["id"]
                pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                if classSet == None: # binary classification
                    if prediction[0] > 0:
                        pairElement.attrib["type"] = str(True)
                    else:
                        pairElement.attrib["type"] = str(False)
                else:
                    pairElement.attrib["type"] = classSet.getName(prediction[0])
                    classWeights = prediction[1:]
                    predictionString = ""
                    for i in range(len(classWeights)):
                        if predictionString != "":
                            predictionString += ","
                        predictionString += classSet.getName(classIds[i]) + ":" + str(classWeights[i])
                    pairElement.attrib["predictions"] = predictionString
                sentenceElement.append(pairElement)
                pairCount += 1
    elif xType == "trigger-event":
        eventsByToken = {}
        existingEntities = set()
        entityElements = sentenceElement.findall("entity")
        entityCount = 0
        pairCount = 0
        if entityElements != None:
            entityCount = len(entityElements) # get the count _before_ removing entities
            for entityElement in entityElements:
                if entityElement.get("isName") == "False": # interaction word
                    sentenceElement.remove(entityElement)
                else:
                    existingEntities.add(entityElement.get("id"))
        # add new pairs
        entityElements = sentenceElement.findall("entity")
        newEntityIdCount = IDUtils.getNextFreeId(entityElements)
        if examplesBySentence.has_key(sentenceId):
            eventIdByExample = {}
            newEntities = []
            for example in examplesBySentence[sentenceId]:
                prediction = predictionsByExample[example[0]]
                if prediction[0] == 1:
                    continue
                entityElement = ET.Element("entity")
                newEntities.append(entityElement)
                entityElement.attrib["isName"] = "False"
                headToken = example[3]["et"]
                for token in sentenceObject.tokens:
                    if token.get("id") == headToken:
                        headToken = token
                        break
                entityElement.attrib["charOffset"] = headToken.get("charOffset") 
                entityElement.attrib["headOffset"] = headToken.get("charOffset")
                entityElement.attrib["text"] = headToken.get("text")
                entityElement.attrib["id"] = sentenceId + ".e" + str(newEntityIdCount)
                newEntityIdCount += 1
                eventIdByExample[example[0]] = entityElement.get("id")
                
                #if not eventByOrigId.has_key(example[3]["e"]):
                #    eventByOrigId[example[3]["e"]] = []
                #eventByOrigId[example[3]["e"]].append(entityElement.attrib["id"])
                #example[3]["e"] = entityElement.attrib["id"]
                
                
                if not eventsByToken.has_key(example[3]["et"]):
                    eventsByToken[example[3]["et"]] = []
                eventsByToken[example[3]["et"]].append(entityElement.get("id"))

                entityElement.attrib["type"] = example[3]["type"]
                classWeights = prediction[1:]
                predictionString = ""
                for i in range(len(classWeights)):
                    if predictionString != "":
                        predictionString += ","
                    predictionString += classSet.getName(classIds[i]) + ":" + str(classWeights[i])
                entityElement.attrib["predictions"] = predictionString
                #if entityElement.attrib["type"] != "neg":
                sentenceElement.append(entityElement)
                entityCount += 1
                
            for example in examplesBySentence[sentenceId]:
                prediction = predictionsByExample[example[0]]
                if prediction[0] == 1:
                    continue
                # add theme edge
                if example[3].has_key("t"):
                    pairElement = ET.Element("interaction")
                    pairElement.attrib["directed"] = "Unknown"
                    pairElement.attrib["e1"] = eventIdByExample[example[0]]
                    if eventsByToken.has_key(example[3]["tt"]):
                        pairElement.attrib["e2"] = eventsByToken[example[3]["tt"]][0]
                    else:
                        if example[3]["t"] in existingEntities:
                            pairElement.attrib["e2"] = example[3]["t"] #.attrib["id"]
                    pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                    pairElement.attrib["type"] = "Theme"
                    if pairElement.get("e2") != None:
                        sentenceElement.append(pairElement)
                        pairCount += 1
                
                # add cause edge
                if example[3].has_key("c"):
                    pairElement = ET.Element("interaction")
                    pairElement.attrib["directed"] = "Unknown"
                    pairElement.attrib["e1"] = eventIdByExample[example[0]]
                    if eventsByToken.has_key(example[3]["ct"]):
                        pairElement.attrib["e2"] = eventsByToken[example[3]["ct"]][0]
                    else:
                        if example[3]["c"] in existingEntities:
                            pairElement.attrib["e2"] = example[3]["c"] #.attrib["id"]
                    pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                    pairElement.attrib["type"] = "Cause"
                    if pairElement.get("e2") != None:
                        sentenceElement.append(pairElement)
                        pairCount += 1
#                    classWeights = prediction[1:]
#                    predictionString = ""
#                    for i in range(len(classWeights)):
#                        if predictionString != "":
#                            predictionString += ","
#                        predictionString += classSet.getName(classIds[i]) + ":" + str(classWeights[i])
#                    pairElement.attrib["predictions"] = predictionString
    elif xType == "event":
        if True:
            process(sentenceObject, examplesBySentence, classSet, classIds, predictionsByExample)
        else:
            eventsByToken = {}
            existingEntities = set()
            entityElements = sentenceElement.findall("entity")
            entityCount = 0
            pairCount = 0
            if entityElements != None:
                entityCount = len(entityElements) # get the count _before_ removing entities
                for entityElement in entityElements:
                    if entityElement.get("isName") == "False": # interaction word
                        sentenceElement.remove(entityElement)
                    else:
                        existingEntities.add(entityElement.get("id"))
            # add new pairs
            entityElements = sentenceElement.findall("entity")
            newEntityIdCount = IDUtils.getNextFreeId(entityElements)
            if examplesBySentence.has_key(sentenceId):
                # split merged examples
                for example in examplesBySentence[sentenceId][:]:
                    prediction = predictionsByExample[example[0]]
                    if classSet.getName(prediction[0]).find("---") != -1:
                        nameSplits = classSet.getName(prediction[0]).split("---")
                        prediction[0] = classSet.getId(nameSplits[0], False)
                        count = 1
                        for nameSplit in nameSplits[1:]:
                            newExample = example[:]
                            newExample[0] += ".dupl" + str(count)
                            examplesBySentence[sentenceId].append(newExample)
                            newPrediction = prediction[:]
                            newPrediction[0] = classSet.getId(nameSplit, False)
                            predictionsByExample[newExample[0]] = newPrediction
                            count += 1
                
                # the rest of the stuff
                eventIdByExample = {}
                newEntities = []
                for example in examplesBySentence[sentenceId]:
                    prediction = predictionsByExample[example[0]]
                    if prediction[0] == 1:
                        continue
                    entityElement = ET.Element("entity")
                    newEntities.append(entityElement)
                    entityElement.attrib["isName"] = "False"
                    headToken = example[3]["et"]
                    for token in sentenceObject.tokens:
                        if token.get("id") == headToken:
                            headToken = token
                            break
                    entityElement.attrib["charOffset"] = headToken.get("charOffset") 
                    entityElement.attrib["headOffset"] = headToken.get("charOffset")
                    entityElement.attrib["text"] = headToken.get("text")
                    entityElement.attrib["id"] = sentenceId + ".e" + str(newEntityIdCount)
                    newEntityIdCount += 1
                    eventIdByExample[example[0]] = entityElement.get("id")
                    
                    #if not eventByOrigId.has_key(example[3]["e"]):
                    #    eventByOrigId[example[3]["e"]] = []
                    #eventByOrigId[example[3]["e"]].append(entityElement.attrib["id"])
                    #example[3]["e"] = entityElement.attrib["id"]
                    
                    
                    if not eventsByToken.has_key(example[3]["et"]):
                        eventsByToken[example[3]["et"]] = []
                    eventsByToken[example[3]["et"]].append(entityElement.get("id"))

                    entityElement.attrib["type"] = classSet.getName(prediction[0]) #example[3]["type"]
                    classWeights = prediction[1:]
                    predictionString = ""
                    for i in range(len(classWeights)):
                        if predictionString != "":
                            predictionString += ","
                        predictionString += classSet.getName(classIds[i]) + ":" + str(classWeights[i])
                    entityElement.attrib["predictions"] = predictionString
                    #if entityElement.attrib["type"] != "neg":
                    sentenceElement.append(entityElement)
                    entityCount += 1
                    
                for example in examplesBySentence[sentenceId]:
                    prediction = predictionsByExample[example[0]]
                    if prediction[0] == 1:
                        continue
                    # add theme edge
                    if example[3].has_key("tt"):
                        pairElement = ET.Element("interaction")
                        pairElement.attrib["directed"] = "Unknown"
                        pairElement.attrib["e1"] = eventIdByExample[example[0]]
                        if eventsByToken.has_key(example[3]["tt"]):
                            pairElement.attrib["e2"] = eventsByToken[example[3]["tt"]][0]
                        elif example[3].has_key("t") and example[3]["t"] in existingEntities:
                            pairElement.attrib["e2"] = example[3]["t"] #.attrib["id"]
                        pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                        pairElement.attrib["type"] = "Theme"
                        if pairElement.get("e2") != None:
                            sentenceElement.append(pairElement)
                            pairCount += 1
                    
                    # add cause edge
                    if example[3].has_key("ct"):
                        pairElement = ET.Element("interaction")
                        pairElement.attrib["directed"] = "Unknown"
                        pairElement.attrib["e1"] = eventIdByExample[example[0]]
                        if eventsByToken.has_key(example[3]["ct"]):
                            pairElement.attrib["e2"] = eventsByToken[example[3]["ct"]][0]
                        elif example[3].has_key("c") and example[3]["c"] in existingEntities:
                            pairElement.attrib["e2"] = example[3]["c"] #.attrib["id"]
                        pairElement.attrib["id"] = sentenceId + ".i" + str(pairCount)
                        pairElement.attrib["type"] = "Cause"
                        if pairElement.get("e2") != None:
                            sentenceElement.append(pairElement)
                            pairCount += 1
    elif xType == None:
        pass
    else:
        sys.exit("Error, unknown xtype")
    # re-attach the analyses-element
    if sentenceAnalysesElement != None:
        sentenceElement.append(sentenceAnalysesElement)