Exemplo n.º 1
0
 def fromXML(self, input, parse, tokenization=None):
     self.names = {}
     if type(input) == types.StringType:
         corpus = CorpusElements.loadCorpus(input, parse, tokenization)
     else:
         corpus = input
     for sentence in corpus.sentences:
         tokenTuples = self.prepareTokens(sentence.tokens)
         for entity in sentence.entities:
             if entity.get("isName") == "True":
                 tokens = self.getTokens(entity, tokenTuples)
                 assert len(tokens) > 0
                 self.addName(tokens)
                 self.addName(["".join(tokens)])
Exemplo n.º 2
0
 def fromXML(self, input, parse, tokenization=None):
     self.names = {}
     if type(input) == types.StringType:
         corpus = CorpusElements.loadCorpus(input, parse, tokenization)
     else:
         corpus = input
     for sentence in corpus.sentences:
         tokenTuples = self.prepareTokens(sentence.tokens)
         for entity in sentence.entities:
             if entity.get("given") == "True":
                 tokens = self.getTokens(entity, tokenTuples)
                 assert len(tokens) > 0
                 self.addName(tokens)
                 self.addName(["".join(tokens)])
Exemplo n.º 3
0
def run(EvaluatorClass, inputCorpusFile, goldCorpusFile, parse, tokenization=None, target="both", entityMatchFunction=compareEntitiesSimple, removeIntersentenceInteractions=False, errorMatrix=False):
    print >> sys.stderr, "##### EvaluateInteractionXML #####"
    print >> sys.stderr, "Comparing input", inputCorpusFile, "to gold", goldCorpusFile
    # Class sets are used to convert the types to ids that the evaluator can use
    classSets = {}
    if EvaluatorClass.type == "binary":
        classSets["entity"] = IdSet(idDict={"True":1,"False":-1}, locked=True)
        classSets["interaction"] = IdSet(idDict={"True":1,"False":-1}, locked=True)
        negativeClassId = -1
    elif EvaluatorClass.type == "multiclass":
        classSets["entity"] = IdSet(idDict={"neg":1}, locked=False)
        classSets["interaction"] = IdSet(idDict={"neg":1}, locked=False)
        negativeClassId = 1
    else:
        sys.exit("Unknown evaluator type")
    
    # Load corpus and make sentence graphs
    goldCorpusElements = None
    if goldCorpusFile != None:
        goldCorpusElements = CorpusElements.loadCorpus(goldCorpusFile, parse, tokenization, removeIntersentenceInteractions)
    predictedCorpusElements = CorpusElements.loadCorpus(inputCorpusFile, parse, tokenization, removeIntersentenceInteractions)    
    
    # Compare the corpora and print results on screen
    return processCorpora(EvaluatorClass, predictedCorpusElements, goldCorpusElements, target, classSets, negativeClassId, entityMatchFunction, errorMatrix=errorMatrix)
Exemplo n.º 4
0
def run(EvaluatorClass, inputCorpusFile, goldCorpusFile, parse, tokenization=None, target="both", entityMatchFunction=compareEntitiesSimple, removeIntersentenceInteractions=False):
    print >> sys.stderr, "##### EvaluateInteractionXML #####"
    print >> sys.stderr, "Comparing input", inputCorpusFile, "to gold", goldCorpusFile
    # Class sets are used to convert the types to ids that the evaluator can use
    classSets = {}
    if EvaluatorClass.type == "binary":
        classSets["entity"] = IdSet(idDict={"True":1,"False":-1}, locked=True)
        classSets["interaction"] = IdSet(idDict={"True":1,"False":-1}, locked=True)
        negativeClassId = -1
    elif EvaluatorClass.type == "multiclass":
        classSets["entity"] = IdSet(idDict={"neg":1}, locked=False)
        classSets["interaction"] = IdSet(idDict={"neg":1}, locked=False)
        negativeClassId = 1
    else:
        sys.exit("Unknown evaluator type")
    
    # Load corpus and make sentence graphs
    goldCorpusElements = None
    if goldCorpusFile != None:
        goldCorpusElements = CorpusElements.loadCorpus(goldCorpusFile, parse, tokenization, removeIntersentenceInteractions)
    predictedCorpusElements = CorpusElements.loadCorpus(inputCorpusFile, parse, tokenization, removeIntersentenceInteractions)    
    
    # Compare the corpora and print results on screen
    return processCorpora(EvaluatorClass, predictedCorpusElements, goldCorpusElements, target, classSets, negativeClassId, entityMatchFunction)
Exemplo n.º 5
0
    import xml.etree.cElementTree as ET
except ImportError:
    import cElementTree as ET
import Utils.ElementTreeUtils as ETUtils
import Utils.InteractionXML.CorpusElements as CorpusElements

if __name__=="__main__":
    from optparse import OptionParser
    optparser = OptionParser(usage="%prog [options]\n")
    optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory")
    optparser.add_option("-f", "--folds", type="int", default=10, dest="folds", help="X-fold cross validation")
    (options, args) = optparser.parse_args()

    # Load corpus and make sentence graphs
    corpusElements = CorpusElements.loadCorpus(options.input)   
    
    outputTrees = []
    for i in range(options.folds):
        newRoot = ET.Element("corpus")
        for key in corpusElements.rootElement.attrib.keys():
            newRoot.attrib[key] = corpusElements.rootElement.attrib[key]
        outputTrees.append(newRoot)
    
    print >> sys.stderr, "Reading document ids"
    documentIds = []
    for document in corpusElements.documents:
        docId = document.attrib["id"]
        assert( not docId in documentIds )
        documentIds.append(docId)
Exemplo n.º 6
0
                         metavar="FILE")
    optparser.add_option("-o",
                         "--output",
                         default=None,
                         dest="output",
                         help="Output directory")
    optparser.add_option("-f",
                         "--folds",
                         type="int",
                         default=10,
                         dest="folds",
                         help="X-fold cross validation")
    (options, args) = optparser.parse_args()

    # Load corpus and make sentence graphs
    corpusElements = CorpusElements.loadCorpus(options.input)

    outputTrees = []
    for i in range(options.folds):
        newRoot = ET.Element("corpus")
        for key in corpusElements.rootElement.attrib.keys():
            newRoot.attrib[key] = corpusElements.rootElement.attrib[key]
        outputTrees.append(newRoot)

    print >> sys.stderr, "Reading document ids"
    documentIds = []
    for document in corpusElements.documents:
        docId = document.attrib["id"]
        assert (not docId in documentIds)
        documentIds.append(docId)