def fromXML(self, input, parse, tokenization=None): self.names = {} if type(input) == types.StringType: corpus = CorpusElements.loadCorpus(input, parse, tokenization) else: corpus = input for sentence in corpus.sentences: tokenTuples = self.prepareTokens(sentence.tokens) for entity in sentence.entities: if entity.get("isName") == "True": tokens = self.getTokens(entity, tokenTuples) assert len(tokens) > 0 self.addName(tokens) self.addName(["".join(tokens)])
def fromXML(self, input, parse, tokenization=None): self.names = {} if type(input) == types.StringType: corpus = CorpusElements.loadCorpus(input, parse, tokenization) else: corpus = input for sentence in corpus.sentences: tokenTuples = self.prepareTokens(sentence.tokens) for entity in sentence.entities: if entity.get("given") == "True": tokens = self.getTokens(entity, tokenTuples) assert len(tokens) > 0 self.addName(tokens) self.addName(["".join(tokens)])
def run(EvaluatorClass, inputCorpusFile, goldCorpusFile, parse, tokenization=None, target="both", entityMatchFunction=compareEntitiesSimple, removeIntersentenceInteractions=False, errorMatrix=False): print >> sys.stderr, "##### EvaluateInteractionXML #####" print >> sys.stderr, "Comparing input", inputCorpusFile, "to gold", goldCorpusFile # Class sets are used to convert the types to ids that the evaluator can use classSets = {} if EvaluatorClass.type == "binary": classSets["entity"] = IdSet(idDict={"True":1,"False":-1}, locked=True) classSets["interaction"] = IdSet(idDict={"True":1,"False":-1}, locked=True) negativeClassId = -1 elif EvaluatorClass.type == "multiclass": classSets["entity"] = IdSet(idDict={"neg":1}, locked=False) classSets["interaction"] = IdSet(idDict={"neg":1}, locked=False) negativeClassId = 1 else: sys.exit("Unknown evaluator type") # Load corpus and make sentence graphs goldCorpusElements = None if goldCorpusFile != None: goldCorpusElements = CorpusElements.loadCorpus(goldCorpusFile, parse, tokenization, removeIntersentenceInteractions) predictedCorpusElements = CorpusElements.loadCorpus(inputCorpusFile, parse, tokenization, removeIntersentenceInteractions) # Compare the corpora and print results on screen return processCorpora(EvaluatorClass, predictedCorpusElements, goldCorpusElements, target, classSets, negativeClassId, entityMatchFunction, errorMatrix=errorMatrix)
def run(EvaluatorClass, inputCorpusFile, goldCorpusFile, parse, tokenization=None, target="both", entityMatchFunction=compareEntitiesSimple, removeIntersentenceInteractions=False): print >> sys.stderr, "##### EvaluateInteractionXML #####" print >> sys.stderr, "Comparing input", inputCorpusFile, "to gold", goldCorpusFile # Class sets are used to convert the types to ids that the evaluator can use classSets = {} if EvaluatorClass.type == "binary": classSets["entity"] = IdSet(idDict={"True":1,"False":-1}, locked=True) classSets["interaction"] = IdSet(idDict={"True":1,"False":-1}, locked=True) negativeClassId = -1 elif EvaluatorClass.type == "multiclass": classSets["entity"] = IdSet(idDict={"neg":1}, locked=False) classSets["interaction"] = IdSet(idDict={"neg":1}, locked=False) negativeClassId = 1 else: sys.exit("Unknown evaluator type") # Load corpus and make sentence graphs goldCorpusElements = None if goldCorpusFile != None: goldCorpusElements = CorpusElements.loadCorpus(goldCorpusFile, parse, tokenization, removeIntersentenceInteractions) predictedCorpusElements = CorpusElements.loadCorpus(inputCorpusFile, parse, tokenization, removeIntersentenceInteractions) # Compare the corpora and print results on screen return processCorpora(EvaluatorClass, predictedCorpusElements, goldCorpusElements, target, classSets, negativeClassId, entityMatchFunction)
import xml.etree.cElementTree as ET except ImportError: import cElementTree as ET import Utils.ElementTreeUtils as ETUtils import Utils.InteractionXML.CorpusElements as CorpusElements if __name__=="__main__": from optparse import OptionParser optparser = OptionParser(usage="%prog [options]\n") optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory") optparser.add_option("-f", "--folds", type="int", default=10, dest="folds", help="X-fold cross validation") (options, args) = optparser.parse_args() # Load corpus and make sentence graphs corpusElements = CorpusElements.loadCorpus(options.input) outputTrees = [] for i in range(options.folds): newRoot = ET.Element("corpus") for key in corpusElements.rootElement.attrib.keys(): newRoot.attrib[key] = corpusElements.rootElement.attrib[key] outputTrees.append(newRoot) print >> sys.stderr, "Reading document ids" documentIds = [] for document in corpusElements.documents: docId = document.attrib["id"] assert( not docId in documentIds ) documentIds.append(docId)
metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory") optparser.add_option("-f", "--folds", type="int", default=10, dest="folds", help="X-fold cross validation") (options, args) = optparser.parse_args() # Load corpus and make sentence graphs corpusElements = CorpusElements.loadCorpus(options.input) outputTrees = [] for i in range(options.folds): newRoot = ET.Element("corpus") for key in corpusElements.rootElement.attrib.keys(): newRoot.attrib[key] = corpusElements.rootElement.attrib[key] outputTrees.append(newRoot) print >> sys.stderr, "Reading document ids" documentIds = [] for document in corpusElements.documents: docId = document.attrib["id"] assert (not docId in documentIds) documentIds.append(docId)