示例#1
0
    def writeXML(self,
                 examples,
                 predictions,
                 corpus,
                 outputFile,
                 classSet=None,
                 parse=None,
                 tokenization=None,
                 goldCorpus=None,
                 exampleStyle=None,
                 structureAnalyzer=None):
        """
        Writes task 3 examples to interaction XML. Assumes task 3 classification
        is done with SVMMulticlass Classifier, used for two classes.
        """
        print >> sys.stderr, "Adding task 3 to Interaction XML"
        examples, predictions = self.loadExamples(examples, predictions)

        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        classIds = None
        if classSet != None:
            classIds = classSet.getIds()

        corpusTree = ETUtils.ETFromObj(corpus)
        corpusRoot = corpusTree.getroot()

        # Determine subtask
        task3Type = None
        for example in examples:
            assert example[3].has_key("t3type")
            task3Type = example[3]["t3type"]
            break
        if task3Type == None:
            if outputFile != None:
                print >> sys.stderr, "Writing corpus to", outputFile
                ETUtils.write(corpusRoot, outputFile)
            return corpusTree
        assert task3Type in ["multiclass", "speculation", "negation"]

        # Remove the task 3 subtask information if it already exists
        for entity in corpusRoot.getiterator("entity"):
            if task3Type == "multiclass":
                entity.set("speculation", "False")
                entity.set("negation", "False")
            elif task3Type == "speculation":
                entity.set("speculation", "False")
            else:  # task3Type == "negation"
                entity.set("negation", "False")

        specMap = {}
        negMap = {}
        for example, prediction in itertools.izip(examples, predictions):
            assert example[3]["xtype"] == "task3"
            if example[3]["t3type"] == "multiclass":
                if isinstance(prediction, dict):
                    encoded = prediction["prediction"]
                    predictedModifiers = [
                        classSet.getName(i) for i in range(len(encoded))
                        if encoded[i] == 1
                    ]
                else:
                    predictedClassName = classSet.getName(prediction[0])
                    predictedModifiers = ""
                    if predictedClassName != "neg":
                        predictedModifiers = predictedClassName.split("---")
                if "negation" in predictedModifiers:
                    assert not negMap.has_key(example[3]["entity"])
                    negMap[example[3]["entity"]] = (True, prediction)
                if "speculation" in predictedModifiers:
                    assert not specMap.has_key(example[3]["entity"])
                    specMap[example[3]["entity"]] = (True, prediction)
            else:
                if example[3]["t3type"] == "speculation":
                    map = specMap
                else:
                    map = negMap
                if prediction[0] != 1:
                    assert not map.has_key(example[3]["entity"])
                    map[example[3]["entity"]] = (True, prediction)
                else:
                    assert not map.has_key(example[3]["entity"])
                    map[example[3]["entity"]] = (False, prediction)

        for entity in corpusRoot.getiterator("entity"):
            eId = entity.get("id")
            if task3Type == "multiclass":
                if specMap.has_key(eId):
                    entity.set("speculation", str(specMap[eId][0]))
                    entity.set(
                        "modConf",
                        self.getPredictionStrengthString(
                            specMap[eId][1], classSet, classIds))
                if negMap.has_key(eId):
                    entity.set("negation", str(negMap[eId][0]))
                    entity.set(
                        "modConf",
                        self.getPredictionStrengthString(
                            negMap[eId][1], classSet, classIds))
            else:
                if task3Type == "speculation":
                    if specMap.has_key(eId):
                        entity.set("speculation", str(specMap[eId][0]))
                        entity.set(
                            "specConf",
                            self.getPredictionStrengthString(
                                specMap[eId][1], classSet, classIds, [""]))
                elif task3Type == "negation":
                    if negMap.has_key(eId):
                        entity.set("negation", str(negMap[eId][0]))
                        entity.set(
                            "negConf",
                            self.getPredictionStrengthString(
                                negMap[eId][1], classSet, classIds,
                                ["", "speculation"]))

        # Write corpus
        if outputFile != None:
            print >> sys.stderr, "Writing corpus to", outputFile
            ETUtils.write(corpusRoot, outputFile)
        return corpusTree
示例#2
0
    def writeXML(self, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None, exampleStyle=None):
        """
        Writes task 3 examples to interaction XML. Assumes task 3 classification
        is done with SVMMulticlass Classifier, used for two classes.
        """
        print >> sys.stderr, "Adding task 3 to Interaction XML"
        examples, predictions = self.loadExamples(examples, predictions)
        
        if type(classSet) == types.StringType: # class names are in file
            classSet = IdSet(filename=classSet)
        classIds = None
        if classSet != None:
            classIds = classSet.getIds()

        corpusTree = ETUtils.ETFromObj(corpus)
        corpusRoot = corpusTree.getroot()
        
        # Determine subtask
        task3Type = None
        for example in examples:
            assert example[3].has_key("t3type")
            task3Type = example[3]["t3type"]
            break        
        if task3Type == None:
            if outputFile != None:
                print >> sys.stderr, "Writing corpus to", outputFile
                ETUtils.write(corpusRoot, outputFile)
            return corpusTree
        assert task3Type in ["multiclass", "speculation", "negation"]
        
        # Remove the task 3 subtask information if it already exists
        for entity in corpusRoot.getiterator("entity"):
            if task3Type == "multiclass":
                entity.set("speculation", "False")
                entity.set("negation", "False")
            elif task3Type == "speculation":
                entity.set("speculation", "False")
            else: # task3Type == "negation"
                entity.set("negation", "False")
        
        specMap = {}
        negMap = {}
        for example, prediction in itertools.izip(examples, predictions):
            assert example[3]["xtype"] == "task3"
            if example[3]["t3type"] == "multiclass":
                predictedClassName = classSet.getName(prediction[0])
                if predictedClassName != "neg":
                    predictedModifiers = predictedClassName.split("---")
                    if "negation" in predictedModifiers:
                        assert not negMap.has_key(example[3]["entity"])
                        negMap[example[3]["entity"]] = (True, prediction)
                    if "speculation" in predictedModifiers:
                        assert not specMap.has_key(example[3]["entity"])
                        specMap[example[3]["entity"]] = (True, prediction)
            else:
                if example[3]["t3type"] == "speculation":
                    map = specMap
                else:
                    map = negMap
                if prediction[0] != 1:
                    assert not map.has_key(example[3]["entity"])
                    map[example[3]["entity"]] = (True, prediction)
                else:
                    assert not map.has_key(example[3]["entity"])
                    map[example[3]["entity"]] = (False, prediction)
        
        for entity in corpusRoot.getiterator("entity"):
            eId = entity.get("id")
            if task3Type == "multiclass":
                if specMap.has_key(eId):
                    entity.set("speculation", str(specMap[eId][0]))
                    entity.set("modPred", self.getPredictionStrengthString(specMap[eId][1], classSet, classIds))
                if negMap.has_key(eId):
                    entity.set("negation", str(negMap[eId][0]))
                    entity.set("modPred", self.getPredictionStrengthString(negMap[eId][1], classSet, classIds))
            else:
                if task3Type == "speculation":
                    if specMap.has_key(eId):
                        entity.set("speculation", str(specMap[eId][0]))
                        entity.set("specPred", self.getPredictionStrengthString(specMap[eId][1], classSet, classIds, [""]))
                elif task3Type == "negation":
                    if negMap.has_key(eId):
                        entity.set("negation", str(negMap[eId][0]))
                        entity.set("negPred", self.getPredictionStrengthString(negMap[eId][1], classSet, classIds, ["","speculation"]))
        
        # Write corpus
        if outputFile != None:
            print >> sys.stderr, "Writing corpus to", outputFile
            ETUtils.write(corpusRoot, outputFile)
        return corpusTree
示例#3
0
 def writeXML(self, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None):
     #print >> sys.stderr, "Writing output to Interaction XML"
     corpus = self.loadCorpus(corpus, parse, tokenization)
     if goldCorpus != None:
         goldCorpus = self.loadCorpus(corpus, parse, tokenization)
     examples, predictions = self.loadExamples(examples, predictions)
     
     if type(classSet) == types.StringType: # class names are in file
         classSet = IdSet(filename=classSet)
     classIds = None
     if classSet != None:
         classIds = classSet.getIds()
         
     #counter = ProgressCounter(len(corpus.sentences), "Write Examples")
             
     exampleQueue = [] # One sentence's examples
     predictionsByExample = {}
     currentMajorId = None
     prevMajorIds = set()
     processedSentenceIds = set()
     xType = None
     
     count = 0
     for example in examples:
         count += 1
     assert count > 0
     progress = ProgressCounter(count, "Write Examples")
     
     for example, prediction in itertools.izip_longest(examples, predictions):
         assert example != None
         assert prediction != None
         majorId, minorId = example[0].rsplit(".x", 1)
         #if currentMajorId == "GENIA.d114.s9": print "Start"
         if majorId != currentMajorId: # new sentence
             if currentMajorId != None:
                 #if currentMajorId == "GENIA.d114.s9": print "JAA"
                 processedSentenceIds.add(currentMajorId)
                 sentenceObject = corpus.sentencesById[currentMajorId]
                 goldSentence = None
                 if goldCorpus != None:
                     goldSentence = goldCorpus.sentencesById[currentMajorId]
                 self.writeXMLSentence(exampleQueue, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=goldSentence) # process queue
                 progress.update(len(exampleQueue), "Writing examples ("+exampleQueue[-1][0]+"): ")
             exampleQueue = []
             predictionsByExample = {}
             prevMajorIds.add(currentMajorId)
             assert majorId not in prevMajorIds, majorId
             currentMajorId = majorId 
         exampleQueue.append(example) # queue example
         predictionsByExample[example[0]] = prediction
         assert example[3]["xtype"] == self.xType, str(example[3]["xtype"]) + "/" + str(self.xType)
     
     # Process what is still in queue
     if currentMajorId != None:
         processedSentenceIds.add(currentMajorId)
         sentenceObject = corpus.sentencesById[currentMajorId]
         goldSentence = None
         if goldCorpus != None:
             goldSentence = goldCorpus.sentencesById[currentMajorId]
         self.writeXMLSentence(exampleQueue, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=goldSentence) # process queue
         progress.update(len(exampleQueue), "Writing examples ("+exampleQueue[-1][0]+"): ")
         exampleQueue = []
         predictionsByExample = {}
     
     # Process sentences with no examples (e.g. to clear interactions)
     for sentenceId in sorted(corpus.sentencesById.keys()):
         if sentenceId not in processedSentenceIds:
             sentenceObject = corpus.sentencesById[sentenceId]
             goldSentence = None
             if goldCorpus != None:
                 goldSentence = goldCorpus.sentencesById[currentMajorId]
             self.writeXMLSentence([], {}, sentenceObject, classSet, classIds, goldSentence=goldSentence)
     
     # Print statistics
     if len(self.counts) > 0:
         print >> sys.stderr, self.counts
         self.counts = defaultdict(int)
 
     # Write corpus
     if outputFile != None:
         print >> sys.stderr, "Writing corpus to", outputFile
         ETUtils.write(corpus.rootElement, outputFile)
     return corpus.tree
示例#4
0
    def writeXML(self,
                 examples,
                 predictions,
                 corpus,
                 outputFile,
                 classSet=None,
                 parse=None,
                 tokenization=None,
                 goldCorpus=None,
                 exampleStyle=None,
                 structureAnalyzer=None):
        #print >> sys.stderr, "Writing output to Interaction XML"
        corpus = self.loadCorpus(corpus, parse, tokenization)
        if goldCorpus != None:
            goldCorpus = self.loadCorpus(corpus, parse, tokenization)
        examples, predictions = self.loadExamples(examples, predictions)

        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        classIds = None
        if classSet != None:
            classIds = classSet.getIds()

        #counter = ProgressCounter(len(corpus.sentences), "Write Examples")

        exampleQueue = []  # One sentence's examples
        predictionsByExample = {}
        currentMajorId = None
        prevMajorIds = set()
        processedSentenceIds = set()
        xType = None

        count = 0
        for example in examples:
            count += 1
        #assert count > 0
        if count > 0:
            progress = ProgressCounter(count, "Write Examples")
        else:
            predCount = 0
            for prediction in predictions:
                predCount += 1
            assert predCount == 0

        for example, prediction in itertools.izip_longest(
                examples, predictions):
            assert example != None
            assert prediction != None
            majorId, minorId = example[0].rsplit(".x", 1)
            #if currentMajorId == "GENIA.d114.s9": print "Start"
            if majorId != currentMajorId:  # new sentence
                if currentMajorId != None:
                    #if currentMajorId == "GENIA.d114.s9": print "JAA"
                    processedSentenceIds.add(currentMajorId)
                    sentenceObject = corpus.sentencesById[currentMajorId]
                    goldSentence = None
                    if goldCorpus != None:
                        goldSentence = goldCorpus.sentencesById[currentMajorId]
                    self.writeXMLSentence(
                        exampleQueue,
                        predictionsByExample,
                        sentenceObject,
                        classSet,
                        classIds,
                        goldSentence=goldSentence,
                        exampleStyle=exampleStyle,
                        structureAnalyzer=structureAnalyzer)  # process queue
                    progress.update(
                        len(exampleQueue),
                        "Writing examples (" + exampleQueue[-1][0] + "): ")
                exampleQueue = []
                predictionsByExample = {}
                prevMajorIds.add(currentMajorId)
                assert majorId not in prevMajorIds, majorId
                currentMajorId = majorId
            exampleQueue.append(example)  # queue example
            predictionsByExample[example[0]] = prediction
            assert example[3]["xtype"] == self.xType, str(
                example[3]["xtype"]) + "/" + str(self.xType)

        # Process what is still in queue
        if currentMajorId != None:
            processedSentenceIds.add(currentMajorId)
            sentenceObject = corpus.sentencesById[currentMajorId]
            goldSentence = None
            if goldCorpus != None:
                goldSentence = goldCorpus.sentencesById[currentMajorId]
            self.writeXMLSentence(
                exampleQueue,
                predictionsByExample,
                sentenceObject,
                classSet,
                classIds,
                goldSentence=goldSentence,
                exampleStyle=exampleStyle,
                structureAnalyzer=structureAnalyzer)  # process queue
            progress.update(len(exampleQueue),
                            "Writing examples (" + exampleQueue[-1][0] + "): ")
            exampleQueue = []
            predictionsByExample = {}

        # Process sentences with no examples (e.g. to clear interactions)
        for sentenceId in sorted(corpus.sentencesById.keys()):
            if sentenceId not in processedSentenceIds:
                sentenceObject = corpus.sentencesById[sentenceId]
                goldSentence = None
                if goldCorpus != None:
                    goldSentence = goldCorpus.sentencesById[currentMajorId]
                self.writeXMLSentence([], {},
                                      sentenceObject,
                                      classSet,
                                      classIds,
                                      goldSentence=goldSentence,
                                      exampleStyle=exampleStyle,
                                      structureAnalyzer=structureAnalyzer)

        # Print statistics
        if len(self.counts) > 0:
            print >> sys.stderr, self.counts
            self.counts = defaultdict(int)

        # Write corpus
        if outputFile != None:
            print >> sys.stderr, "Writing corpus to", outputFile
            ETUtils.write(corpus.rootElement, outputFile)
        return corpus.tree