Пример #1
0
def prepareGoldForEvaluation(corpus,
                             sourceDir,
                             tempdir,
                             goldDir=None,
                             goldPackage=None):
    if os.path.exists(os.path.join(tempdir, "gold")):
        shutil.rmtree(os.path.join(tempdir, "gold"))
    if goldDir == None:
        if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"):
            print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined"
            return None
        if goldPackage == None:
            goldPackage = Settings.EVALUATOR[corpus + "_DEVEL-gold"]
        goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, goldPackage)
    if not os.path.exists(goldDir):
        print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist"
        goldDir = None
    if goldDir != None and goldDir.endswith(".tar.gz") or goldDir.endswith(
            ".tgz"):
        goldDir = Download.getTopDir(
            os.path.join(tempdir, "gold"),
            Download.extractPackage(goldDir, os.path.join(tempdir, "gold")))
        print >> sys.stderr, "Uncompressed evaluation gold to", goldDir
    if goldDir != None and not hasGoldDocuments(sourceDir, goldDir):
        print >> sys.stderr, "Evaluation input has no gold documents"
        goldDir = None
    return goldDir
Пример #2
0
 def install(self, destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False):
     url = Settings.URL["BLLIP_SOURCE"]
     if downloadDir == None:
         downloadDir = os.path.join(Settings.DATAPATH) + "/tools/download"
     if destDir == None:
         destDir = Settings.DATAPATH + "/tools/BLLIP"
     items = Download.downloadAndExtract(url, destDir, downloadDir + "/bllip.zip", None, False)
     print >> sys.stderr, "Installing BLLIP parser"
     Tool.testPrograms("BLLIP parser", ["make", "flex"], {"flex":"flex --version"})
     parserPath = Download.getTopDir(destDir, items)
     cwd = os.getcwd()
     os.chdir(parserPath)
     print >> sys.stderr, "Compiling first-stage parser"
     subprocess.call("make", shell=True)
     print >> sys.stderr, "Compiling second-stage parser"
     subprocess.call("make reranker", shell=True)
     os.chdir(cwd)
     print >> sys.stderr, "Installing the McClosky biomedical parsing model"
     url = "http://bllip.cs.brown.edu/download/bioparsingmodel-rel1.tar.gz"
     Download.downloadAndExtract(url, destDir, downloadDir, None)
     bioModelDir = os.path.abspath(destDir + "/biomodel")
     # Check that everything works
     Tool.finalizeInstall(["first-stage/PARSE/parseIt", "second-stage/programs/features/best-parses"], 
                          {"first-stage/PARSE/parseIt":"first-stage/PARSE/parseIt " + bioModelDir + "/parser/ < /dev/null",
                           "second-stage/programs/features/best-parses":"second-stage/programs/features/best-parses -l " + bioModelDir + "/reranker/features.gz " + bioModelDir + "/reranker/weights.gz < /dev/null"},
                          parserPath, {"BLLIP_PARSER_DIR":os.path.abspath(parserPath), 
                                       "MCCLOSKY_BIOPARSINGMODEL_DIR":bioModelDir}, updateLocalSettings)         
Пример #3
0
def install(destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False):
    url = Settings.URL["BLLIP_SOURCE"]
    if downloadDir == None:
        downloadDir = os.path.join(Settings.DATAPATH) + "/tools/download"
    if destDir == None:
        destDir = Settings.DATAPATH + "/tools/BLLIP"
    items = Download.downloadAndExtract(url, destDir, downloadDir + "/bllip.zip", None, False)
    print >> sys.stderr, "Installing BLLIP parser"
    Tool.testPrograms("BLLIP parser", ["make", "flex"], {"flex":"flex --version"})
    parserPath = Download.getTopDir(destDir, items)
    cwd = os.getcwd()
    os.chdir(parserPath)
    print >> sys.stderr, "Compiling first-stage parser"
    subprocess.call("make", shell=True)
    print >> sys.stderr, "Compiling second-stage parser"
    subprocess.call("make reranker", shell=True)
    os.chdir(cwd)
    print >> sys.stderr, "Installing the McClosky biomedical parsing model"
    url = "http://bllip.cs.brown.edu/download/bioparsingmodel-rel1.tar.gz"
    Download.downloadAndExtract(url, destDir, downloadDir, None)
    bioModelDir = os.path.abspath(destDir + "/biomodel")
    # Check that everything works
    Tool.finalizeInstall(["first-stage/PARSE/parseIt", "second-stage/programs/features/best-parses"], 
                         {"first-stage/PARSE/parseIt":"first-stage/PARSE/parseIt " + bioModelDir + "/parser/ < /dev/null",
                          "second-stage/programs/features/best-parses":"second-stage/programs/features/best-parses -l " + bioModelDir + "/reranker/features.gz " + bioModelDir + "/reranker/weights.gz < /dev/null"},
                         parserPath, {"BLLIP_PARSER_DIR":os.path.abspath(parserPath), 
                                      "MCCLOSKY_BIOPARSINGMODEL_DIR":bioModelDir}, updateLocalSettings)
Пример #4
0
def install(destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False):
    print >> sys.stderr, "Installing Stanford Parser"
    if downloadDir == None:
        downloadDir = os.path.join(Settings.DATAPATH, "tools/download/")
    if destDir == None:
        destDir = os.path.join(Settings.DATAPATH, "tools/")
    items = Download.downloadAndExtract(Settings.URL["STANFORD_PARSER"], destDir, downloadDir)
    stanfordPath = Download.getTopDir(destDir, items)
    Tool.finalizeInstall(["stanford-parser.jar"], 
                         {"stanford-parser.jar":"java -cp stanford-parser.jar:slf4j-api.jar edu.stanford.nlp.trees.EnglishGrammaticalStructure"},
                         stanfordPath, {"STANFORD_PARSER_DIR":stanfordPath}, updateLocalSettings)
Пример #5
0
 def install(self, destDir=None, downloadDir=None, redownload=False, updateLocalSettings=False):
     print >> sys.stderr, "Installing Stanford Parser"
     if downloadDir == None:
         downloadDir = os.path.join(Settings.DATAPATH, "tools/download/")
     if destDir == None:
         destDir = os.path.join(Settings.DATAPATH, "tools/")
     items = Download.downloadAndExtract(Settings.URL["STANFORD_PARSER"], destDir, downloadDir)
     stanfordPath = Download.getTopDir(destDir, items)
     Tool.finalizeInstall(["stanford-parser.jar"], 
                          {"stanford-parser.jar":"java -cp stanford-parser.jar edu.stanford.nlp.trees.EnglishGrammaticalStructure"},
                          stanfordPath, {"STANFORD_PARSER_DIR":stanfordPath}, updateLocalSettings)
Пример #6
0
def install(destDir=None, downloadDir=None, redownload=False):
    print >> sys.stderr, "Installing BioNLP'11 evaluators"
    settings = {}
    if downloadDir == None:
        downloadDir = Settings.DATAPATH
    if destDir == None:
        destDir = Settings.DATAPATH
    for corpus in ["GE", "BB", "BI", "CO"]:
        print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator"
        settings[corpus + "_EVALUATOR"] = Download.getTopDir(destDir + "/tools/evaluators/", Download.downloadAndExtract(Settings.URL[corpus + "_EVALUATOR"], destDir + "/tools/evaluators/", downloadDir + "/tools/download/"))
        print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator gold data"
        Download.downloadAndExtract(Settings.URL[corpus + "_DEVEL"], destDir + "/tools/evaluators/gold/" + corpus + "-devel", downloadDir + "/corpora/BioNLP11-original/corpus/", os.path.basename(Settings.URL[corpus + "_DEVEL"])[:-len(".tar.gz")])
    return settings
def checkEvaluator(corpus, sourceDir, goldDir=None):
    # Check evaluator
    if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"):
        print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined"
        evaluatorDir = None
    else:
        evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR,
                                    Settings.EVALUATOR[corpus])
    # Check source data
    tempdir = None
    if sourceDir.endswith(".tar.gz"):
        tempdir = tempfile.mkdtemp()
        Download.extractPackage(sourceDir, os.path.join(tempdir, "source"))
        sourceDir = os.path.join(tempdir, "source")
    else:  #if corpus in ("GE09", "BB11", "BI11"):
        # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations.
        # X-lines have to be removed from all tasks
        tempdir = tempfile.mkdtemp()
        shutil.copytree(sourceDir, os.path.join(tempdir, "source"))
        sourceDir = os.path.join(tempdir, "source")
    # Filter extra data
    removeXLines(sourceDir)
    # Check gold data
    if goldDir == None:
        if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"):
            print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined"
            return evaluatorDir, None
        goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR,
                               Settings.EVALUATOR[corpus + "-gold"])
    if not os.path.exists(goldDir):
        print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist"
        goldDir = None
    if goldDir != None and goldDir.endswith(".tar.gz"):
        if tempdir == None:
            tempdir = tempfile.mkdtemp()
        goldDir = Download.getTopDir(
            os.path.join(tempdir, "gold"),
            Download.extractPackage(goldDir, os.path.join(tempdir, "gold")))
        print >> sys.stderr, "Uncompressed evaluation gold to", goldDir
    if goldDir != None and not hasGoldDocuments(sourceDir, goldDir):
        print >> sys.stderr, "Evaluation input has no gold documents"
        goldDir = None
    # Use absolute paths
    sourceDir = os.path.abspath(sourceDir)
    if evaluatorDir != None:
        evaluatorDir = os.path.abspath(evaluatorDir)
    if goldDir != None:
        goldDir = os.path.abspath(goldDir)
    if tempdir != None:
        tempdir = os.path.abspath(tempdir)
    return evaluatorDir, sourceDir, goldDir, tempdir
Пример #8
0
def checkEvaluator(corpus, sourceDir, goldDir = None):
    # Check evaluator
    if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"):
        print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined"
        evaluatorDir = None
    else:
        evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus])
    # Check source data
    tempdir = None
    if sourceDir.endswith(".tar.gz"):
        tempdir = tempfile.mkdtemp()
        Download.extractPackage(sourceDir, os.path.join(tempdir, "source"))
        sourceDir = os.path.join(tempdir, "source")
    else: #if corpus in ("GE09", "BB11", "BI11"):
        # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations.
        # X-lines have to be removed from all tasks
        tempdir = tempfile.mkdtemp()
        shutil.copytree(sourceDir, os.path.join(tempdir, "source"))
        sourceDir = os.path.join(tempdir, "source")
    # Filter extra data
    removeXLines(sourceDir)
    # Check gold data
    if goldDir == None:
        if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"):
            print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined"
            return evaluatorDir, None
        goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"])
        print >> sys.stderr, "Found gold data directory", goldDir
    if not os.path.exists(goldDir):
        print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist"
        goldDir = None
    if goldDir != None and goldDir.endswith(".tar.gz"):
        if tempdir == None:
            tempdir = tempfile.mkdtemp()
        goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold")))
        print >> sys.stderr, "Uncompressed evaluation gold to", goldDir
    if goldDir != None and not hasGoldDocuments(sourceDir, goldDir):
        print >> sys.stderr, "Evaluation input has no gold documents"
        goldDir = None
    # Use absolute paths
    sourceDir = os.path.abspath(sourceDir)
    if evaluatorDir != None:
        evaluatorDir = os.path.abspath(evaluatorDir)
    if goldDir != None:
        goldDir = os.path.abspath(goldDir)
    if tempdir != None:
        tempdir = os.path.abspath(tempdir)
    return evaluatorDir, sourceDir, goldDir, tempdir
Пример #9
0
def checkEvaluator(corpus, sourceDir, goldDir=None):
    # Check evaluator
    if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"):
        print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined"
        evaluatorDir = None
    else:
        evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus])
    # Check source data
    tempdir = None
    if sourceDir.endswith(".tar.gz"):
        tempdir = tempfile.mkdtemp()
        Download.extractPackage(sourceDir, os.path.join(tempdir, "source"))
        sourceDir = os.path.join(tempdir, "source")
    elif corpus == "GE09":  # a2 files have to be renamed
        tempdir = tempfile.mkdtemp()
        shutil.copytree(sourceDir, os.path.join(tempdir, "source"))
        sourceDir = os.path.join(tempdir, "source")
    # Check gold data
    if goldDir == None:
        if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"):
            print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined"
            return evaluatorDir, None
        goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"])
    if not os.path.exists(goldDir):
        print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist"
        goldDir = None
    if goldDir != None and goldDir.endswith(".tar.gz"):
        if tempdir == None:
            tempdir = tempfile.mkdtemp()
        goldDir = Download.getTopDir(
            os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))
        )
        print >> sys.stderr, "Uncompressed evaluation gold to", goldDir
    if goldDir != None and not hasGoldDocuments(sourceDir, goldDir):
        print >> sys.stderr, "Evaluation input has no gold documents"
        goldDir = None
    # Use absolute paths
    sourceDir = os.path.abspath(sourceDir)
    if evaluatorDir != None:
        evaluatorDir = os.path.abspath(evaluatorDir)
    if goldDir != None:
        goldDir = os.path.abspath(goldDir)
    if tempdir != None:
        tempdir = os.path.abspath(tempdir)
    return evaluatorDir, sourceDir, goldDir, tempdir
Пример #10
0
def prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir=None, goldPackage=None):
    if os.path.exists(os.path.join(tempdir, "gold")):
        shutil.rmtree(os.path.join(tempdir, "gold"))
    if goldDir == None:
        if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"):
            print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined"
            return None
        if goldPackage == None:
            goldPackage = Settings.EVALUATOR[corpus + "_DEVEL-gold"]
        goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, goldPackage)
    if not os.path.exists(goldDir):
        print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist"
        goldDir = None
    if goldDir != None and goldDir.endswith(".tar.gz") or goldDir.endswith(".tgz"):
        goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold")))
        print >> sys.stderr, "Uncompressed evaluation gold to", goldDir
    if goldDir != None and not hasGoldDocuments(sourceDir, goldDir):
        print >> sys.stderr, "Evaluation input has no gold documents"
        goldDir = None
    return goldDir
Пример #11
0
def install(destDir=None, downloadDir=None, redownload=False):
    print >> sys.stderr, "Installing BioNLP'11 evaluators"
    settings = {}
    if downloadDir == None:
        downloadDir = Settings.DATAPATH
    if destDir == None:
        destDir = Settings.DATAPATH
    for corpus in ["GE11", "BB11", "BI11", "CO11"]:
        print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator"
        settings[corpus + "_EVALUATOR"] = Download.getTopDir(
            destDir + "/tools/evaluators/",
            Download.downloadAndExtract(Settings.URL[corpus + "_EVALUATOR"],
                                        destDir + "/tools/evaluators/",
                                        downloadDir + "/tools/download/"))
        print >> sys.stderr, "Installing BioNLP'11", corpus, "evaluator gold data"
        Download.downloadAndExtract(
            Settings.URL[corpus + "_DEVEL"],
            destDir + "/tools/evaluators/gold/" + corpus + "-devel",
            downloadDir + "/corpora/BioNLP11-original/corpus/",
            os.path.basename(Settings.URL[corpus +
                                          "_DEVEL"])[:-len(".tar.gz")])
    return settings