def prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir=None, goldPackage=None): if os.path.exists(os.path.join(tempdir, "gold")): shutil.rmtree(os.path.join(tempdir, "gold")) if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return None if goldPackage == None: goldPackage = Settings.EVALUATOR[corpus + "_DEVEL-gold"] goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, goldPackage) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz") or goldDir.endswith( ".tgz"): goldDir = Download.getTopDir( os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None return goldDir
def install(self, destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False): url = Settings.URL["BLLIP_SOURCE"] if downloadDir == None: downloadDir = os.path.join(Settings.DATAPATH) + "/tools/download" if destDir == None: destDir = Settings.DATAPATH + "/tools/BLLIP" items = Download.downloadAndExtract(url, destDir, downloadDir + "/bllip.zip", None, False) print >> sys.stderr, "Installing BLLIP parser" Tool.testPrograms("BLLIP parser", ["make", "flex"], {"flex":"flex --version"}) parserPath = Download.getTopDir(destDir, items) cwd = os.getcwd() os.chdir(parserPath) print >> sys.stderr, "Compiling first-stage parser" subprocess.call("make", shell=True) print >> sys.stderr, "Compiling second-stage parser" subprocess.call("make reranker", shell=True) os.chdir(cwd) print >> sys.stderr, "Installing the McClosky biomedical parsing model" url = "http://bllip.cs.brown.edu/download/bioparsingmodel-rel1.tar.gz" Download.downloadAndExtract(url, destDir, downloadDir, None) bioModelDir = os.path.abspath(destDir + "/biomodel") # Check that everything works Tool.finalizeInstall(["first-stage/PARSE/parseIt", "second-stage/programs/features/best-parses"], {"first-stage/PARSE/parseIt":"first-stage/PARSE/parseIt " + bioModelDir + "/parser/ < /dev/null", "second-stage/programs/features/best-parses":"second-stage/programs/features/best-parses -l " + bioModelDir + "/reranker/features.gz " + bioModelDir + "/reranker/weights.gz < /dev/null"}, parserPath, {"BLLIP_PARSER_DIR":os.path.abspath(parserPath), "MCCLOSKY_BIOPARSINGMODEL_DIR":bioModelDir}, updateLocalSettings)
def install(destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False): url = Settings.URL["BLLIP_SOURCE"] if downloadDir == None: downloadDir = os.path.join(Settings.DATAPATH) + "/tools/download" if destDir == None: destDir = Settings.DATAPATH + "/tools/BLLIP" items = Download.downloadAndExtract(url, destDir, downloadDir + "/bllip.zip", None, False) print >> sys.stderr, "Installing BLLIP parser" Tool.testPrograms("BLLIP parser", ["make", "flex"], {"flex":"flex --version"}) parserPath = Download.getTopDir(destDir, items) cwd = os.getcwd() os.chdir(parserPath) print >> sys.stderr, "Compiling first-stage parser" subprocess.call("make", shell=True) print >> sys.stderr, "Compiling second-stage parser" subprocess.call("make reranker", shell=True) os.chdir(cwd) print >> sys.stderr, "Installing the McClosky biomedical parsing model" url = "http://bllip.cs.brown.edu/download/bioparsingmodel-rel1.tar.gz" Download.downloadAndExtract(url, destDir, downloadDir, None) bioModelDir = os.path.abspath(destDir + "/biomodel") # Check that everything works Tool.finalizeInstall(["first-stage/PARSE/parseIt", "second-stage/programs/features/best-parses"], {"first-stage/PARSE/parseIt":"first-stage/PARSE/parseIt " + bioModelDir + "/parser/ < /dev/null", "second-stage/programs/features/best-parses":"second-stage/programs/features/best-parses -l " + bioModelDir + "/reranker/features.gz " + bioModelDir + "/reranker/weights.gz < /dev/null"}, parserPath, {"BLLIP_PARSER_DIR":os.path.abspath(parserPath), "MCCLOSKY_BIOPARSINGMODEL_DIR":bioModelDir}, updateLocalSettings)
def install(destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "Installing Stanford Parser" if downloadDir == None: downloadDir = os.path.join(Settings.DATAPATH, "tools/download/") if destDir == None: destDir = os.path.join(Settings.DATAPATH, "tools/") items = Download.downloadAndExtract(Settings.URL["STANFORD_PARSER"], destDir, downloadDir) stanfordPath = Download.getTopDir(destDir, items) Tool.finalizeInstall(["stanford-parser.jar"], {"stanford-parser.jar":"java -cp stanford-parser.jar:slf4j-api.jar edu.stanford.nlp.trees.EnglishGrammaticalStructure"}, stanfordPath, {"STANFORD_PARSER_DIR":stanfordPath}, updateLocalSettings)
def install(self, destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False): print >> sys.stderr, "Installing Stanford Parser" if downloadDir == None: downloadDir = os.path.join(Settings.DATAPATH, "tools/download/") if destDir == None: destDir = os.path.join(Settings.DATAPATH, "tools/") items = Download.downloadAndExtract(Settings.URL["STANFORD_PARSER"], destDir, downloadDir) stanfordPath = Download.getTopDir(destDir, items) Tool.finalizeInstall(["stanford-parser.jar"], {"stanford-parser.jar":"java -cp stanford-parser.jar edu.stanford.nlp.trees.EnglishGrammaticalStructure"}, stanfordPath, {"STANFORD_PARSER_DIR":stanfordPath}, updateLocalSettings)
def install(destDir=None, downloadDir=None, redownload=False): print >> sys.stderr, "Installing BioNLP'11 evaluators" settings = {} if downloadDir == None: downloadDir = Settings.DATAPATH if destDir == None: destDir = Settings.DATAPATH for corpus in ["GE", "BB", "BI", "CO"]: print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator" settings[corpus + "_EVALUATOR"] = Download.getTopDir(destDir + "/tools/evaluators/", Download.downloadAndExtract(Settings.URL[corpus + "_EVALUATOR"], destDir + "/tools/evaluators/", downloadDir + "/tools/download/")) print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator gold data" Download.downloadAndExtract(Settings.URL[corpus + "_DEVEL"], destDir + "/tools/evaluators/gold/" + corpus + "-devel", downloadDir + "/corpora/BioNLP11-original/corpus/", os.path.basename(Settings.URL[corpus + "_DEVEL"])[:-len(".tar.gz")]) return settings
def checkEvaluator(corpus, sourceDir, goldDir=None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = None if sourceDir.endswith(".tar.gz"): tempdir = tempfile.mkdtemp() Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") else: #if corpus in ("GE09", "BB11", "BI11"): # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations. # X-lines have to be removed from all tasks tempdir = tempfile.mkdtemp() shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Filter extra data removeXLines(sourceDir) # Check gold data if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return evaluatorDir, None goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz"): if tempdir == None: tempdir = tempfile.mkdtemp() goldDir = Download.getTopDir( os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def checkEvaluator(corpus, sourceDir, goldDir = None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = None if sourceDir.endswith(".tar.gz"): tempdir = tempfile.mkdtemp() Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") else: #if corpus in ("GE09", "BB11", "BI11"): # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations. # X-lines have to be removed from all tasks tempdir = tempfile.mkdtemp() shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Filter extra data removeXLines(sourceDir) # Check gold data if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return evaluatorDir, None goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) print >> sys.stderr, "Found gold data directory", goldDir if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz"): if tempdir == None: tempdir = tempfile.mkdtemp() goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def checkEvaluator(corpus, sourceDir, goldDir=None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = None if sourceDir.endswith(".tar.gz"): tempdir = tempfile.mkdtemp() Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") elif corpus == "GE09": # a2 files have to be renamed tempdir = tempfile.mkdtemp() shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Check gold data if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return evaluatorDir, None goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz"): if tempdir == None: tempdir = tempfile.mkdtemp() goldDir = Download.getTopDir( os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold")) ) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir=None, goldPackage=None): if os.path.exists(os.path.join(tempdir, "gold")): shutil.rmtree(os.path.join(tempdir, "gold")) if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return None if goldPackage == None: goldPackage = Settings.EVALUATOR[corpus + "_DEVEL-gold"] goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, goldPackage) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz") or goldDir.endswith(".tgz"): goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None return goldDir
def install(destDir=None, downloadDir=None, redownload=False): print >> sys.stderr, "Installing BioNLP'11 evaluators" settings = {} if downloadDir == None: downloadDir = Settings.DATAPATH if destDir == None: destDir = Settings.DATAPATH for corpus in ["GE11", "BB11", "BI11", "CO11"]: print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator" settings[corpus + "_EVALUATOR"] = Download.getTopDir( destDir + "/tools/evaluators/", Download.downloadAndExtract(Settings.URL[corpus + "_EVALUATOR"], destDir + "/tools/evaluators/", downloadDir + "/tools/download/")) print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator gold data" Download.downloadAndExtract( Settings.URL[corpus + "_DEVEL"], destDir + "/tools/evaluators/gold/" + corpus + "-devel", downloadDir + "/corpora/BioNLP11-original/corpus/", os.path.basename(Settings.URL[corpus + "_DEVEL"])[:-len(".tar.gz")]) return settings