def checkEvaluator(corpus, sourceDir, goldDir = None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = tempfile.mkdtemp() if sourceDir.endswith(".tar.gz"): Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") else: #if corpus in ("GE09", "BB11", "BI11"): # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations. # X-lines have to be removed from all tasks shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Filter extra data removeXLines(sourceDir) # Check gold data print >> sys.stderr, "Using devel set gold for evaluation" goldDir = prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir) if goldDir == None and Settings.EVALUATOR.has_key(corpus + "_TEST-gold"): print >> sys.stderr, "Using test set gold for evaluation" goldDir = prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir, Settings.EVALUATOR[corpus + "_TEST-gold"]) # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def checkEvaluator(corpus, sourceDir, goldDir=None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = None if sourceDir.endswith(".tar.gz"): tempdir = tempfile.mkdtemp() Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") else: #if corpus in ("GE09", "BB11", "BI11"): # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations. # X-lines have to be removed from all tasks tempdir = tempfile.mkdtemp() shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Filter extra data removeXLines(sourceDir) # Check gold data if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return evaluatorDir, None goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz"): if tempdir == None: tempdir = tempfile.mkdtemp() goldDir = Download.getTopDir( os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def checkEvaluator(corpus, sourceDir, goldDir = None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = None if sourceDir.endswith(".tar.gz"): tempdir = tempfile.mkdtemp() Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") else: #if corpus in ("GE09", "BB11", "BI11"): # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations. # X-lines have to be removed from all tasks tempdir = tempfile.mkdtemp() shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Filter extra data removeXLines(sourceDir) # Check gold data if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return evaluatorDir, None goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) print >> sys.stderr, "Found gold data directory", goldDir if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz"): if tempdir == None: tempdir = tempfile.mkdtemp() goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir=None, goldPackage=None): if os.path.exists(os.path.join(tempdir, "gold")): shutil.rmtree(os.path.join(tempdir, "gold")) if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return None if goldPackage == None: goldPackage = Settings.EVALUATOR[corpus + "_DEVEL-gold"] goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, goldPackage) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz") or goldDir.endswith( ".tgz"): goldDir = Download.getTopDir( os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None return goldDir
def checkEvaluator(corpus, sourceDir, goldDir=None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = None if sourceDir.endswith(".tar.gz"): tempdir = tempfile.mkdtemp() Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") elif corpus == "GE09": # a2 files have to be renamed tempdir = tempfile.mkdtemp() shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Check gold data if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return evaluatorDir, None goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, Settings.EVALUATOR[corpus + "-gold"]) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz"): if tempdir == None: tempdir = tempfile.mkdtemp() goldDir = Download.getTopDir( os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold")) ) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def checkEvaluator(corpus, sourceDir, goldDir=None): # Check evaluator if not hasattr(Settings, "BIONLP_EVALUATOR_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_DIR setting not defined" evaluatorDir = None else: evaluatorDir = os.path.join(Settings.BIONLP_EVALUATOR_DIR, Settings.EVALUATOR[corpus]) # Check source data tempdir = tempfile.mkdtemp() if sourceDir.endswith(".tar.gz"): Download.extractPackage(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") else: #if corpus in ("GE09", "BB11", "BI11"): # GE09 a2 files have to be renamed and relation identifier "R" has to be replaced with "E" for the BB11 and BI11 relations. # X-lines have to be removed from all tasks shutil.copytree(sourceDir, os.path.join(tempdir, "source")) sourceDir = os.path.join(tempdir, "source") # Filter extra data removeXLines(sourceDir) # Check gold data print >> sys.stderr, "Using devel set gold for evaluation" goldDir = prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir) if goldDir == None and Settings.EVALUATOR.has_key(corpus + "_TEST-gold"): print >> sys.stderr, "Using test set gold for evaluation" goldDir = prepareGoldForEvaluation( corpus, sourceDir, tempdir, goldDir, Settings.EVALUATOR[corpus + "_TEST-gold"]) # Use absolute paths sourceDir = os.path.abspath(sourceDir) if evaluatorDir != None: evaluatorDir = os.path.abspath(evaluatorDir) if goldDir != None: goldDir = os.path.abspath(goldDir) if tempdir != None: tempdir = os.path.abspath(tempdir) return evaluatorDir, sourceDir, goldDir, tempdir
def prepareGoldForEvaluation(corpus, sourceDir, tempdir, goldDir=None, goldPackage=None): if os.path.exists(os.path.join(tempdir, "gold")): shutil.rmtree(os.path.join(tempdir, "gold")) if goldDir == None: if not hasattr(Settings, "BIONLP_EVALUATOR_GOLD_DIR"): print >> sys.stderr, corpus, "BIONLP_EVALUATOR_GOLD_DIR setting not defined" return None if goldPackage == None: goldPackage = Settings.EVALUATOR[corpus + "_DEVEL-gold"] goldDir = os.path.join(Settings.BIONLP_EVALUATOR_GOLD_DIR, goldPackage) if not os.path.exists(goldDir): print >> sys.stderr, corpus, "Evaluator gold data directory", goldDir, "does not exist" goldDir = None if goldDir != None and goldDir.endswith(".tar.gz") or goldDir.endswith(".tgz"): goldDir = Download.getTopDir(os.path.join(tempdir, "gold"), Download.extractPackage(goldDir, os.path.join(tempdir, "gold"))) print >> sys.stderr, "Uncompressed evaluation gold to", goldDir if goldDir != None and not hasGoldDocuments(sourceDir, goldDir): print >> sys.stderr, "Evaluation input has no gold documents" goldDir = None return goldDir