示例#1
0
def getSteps(step, omitSteps, mainSteps):
    # Determine substep to start from, for the main step from which processing starts
    step = Parameters.get(step, mainSteps)
    fromMainStep = None
    fromSubStep = {} # The substep to start from, for the main step to start from
    for mainStep in step.keys():
        fromSubStep[mainStep] = step[mainStep] # the sub step to start from
        if step[mainStep] != None:
            assert fromMainStep == None # processing can start from one place only
            fromMainStep = mainStep
            if step[mainStep] == True:
                fromSubStep[mainStep] = None
            else:
                assert type(step[mainStep]) in types.StringTypes # no list allowed, processing can start from one place only
    # Determine steps to omit
    omitSubSteps = {} # Skip these substeps. If the value is True, skip the entire main step.
    omitMainSteps = []
    omitSteps = Parameters.get(omitSteps, mainSteps)
    for mainStep in omitSteps.keys():
        omitSubSteps[mainStep] = omitSteps[mainStep]
        if omitSteps[mainStep] == True:
            omitMainSteps.append(mainStep)
            omitSubSteps[mainStep] = None
    # Initialize main step selector
    if fromMainStep != None:
        if fromSubStep[fromMainStep] != None:
            print >> sys.stderr, "Starting process from step", fromMainStep + ", substep", fromSubStep[fromMainStep]
        else:
            print >> sys.stderr, "Starting process from step", fromMainStep
    selector = StepSelector(mainSteps, fromStep=fromMainStep, omitSteps=omitMainSteps)
    return selector, fromSubStep, omitSubSteps
示例#2
0
optparser.add_option("-c", "--corpus", default="PMC11", dest="corpus", help="corpus name for preprocessing")
optparser.add_option("-o", "--output", default=None, dest="output", help="output directory")
optparser.add_option("-w", "--workdir", default=None, dest="workdir", help="work directory")
optparser.add_option("-m", "--model", default=None, dest="model", help="model file or directory")
optparser.add_option("-p", "--parse", default="split-McClosky", dest="parse", help="Parse XML element name")
optparser.add_option("--eventTag", default="GE", dest="eventTag", help="")
optparser.add_option("--step", default=None, dest="step", help="")
optparser.add_option("--detectorStep", default=None, dest="detectorStep", help="")
optparser.add_option("--omitPreprocessorSteps", default=None, dest="omitPreprocessorSteps", help="")
optparser.add_option("--csc", default="", dest="csc", help="")
optparser.add_option("--noLog", default=False, action="store_true", dest="noLog", help="")
optparser.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files")
optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="")
(options, args) = optparser.parse_args()

selector = StepSelector(["PREPROCESS", "EVENTS"], fromStep=options.step)

# Get the input stem, which will be used for naming the output files
options.input = options.input.rstrip("/")
if options.output == None:
    INPUT_TAG = options.input
else:
    if not os.path.exists(options.output):
        os.makedirs(options.output)
    INPUT_TAG = os.path.join(options.output, options.input.rsplit("/", 1)[-1])
#if os.path.isfile(options.input):
#    if INPUT_TAG.endswith(".tar.gz"):
#        INPUT_TAG = INPUT_TAG[:-len(".tar.gz")]
open(INPUT_TAG+"-STARTED", "w").close() # Mark process status
        
# Start logging
示例#3
0
optparser.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files")
optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="More verbose output")
optparser.add_option("-u", "--unmerging", default=False, action="store_true", dest="unmerging", help="SVM unmerging")
optparser.add_option("-m", "--modifiers", default=False, action="store_true", dest="modifiers", help="Train model for modifier detection")
# Task 3
optparser.add_option("--speculationModel", default=os.path.expanduser("~/biotext/BioNLP2011/tests/task3/task3TrainGE-EPI-ID/speculation-models/model-c_150000"), dest="speculationModel", help="SVM-multiclass speculation model")
optparser.add_option("--negationModel", default=os.path.expanduser("~/biotext/BioNLP2011/tests/task3/task3TrainGE-EPI-ID/negation-models/model-c_16000"), dest="negationModel", help="SVM-multiclass negation model")
optparser.add_option("--task3Ids", default=os.path.expanduser("~/biotext/BioNLP2011/tests/task3/task3TrainGE-EPI-ID/genia-task3-ids"), dest="task3Ids", help="Speculation & negation SVM example class and feature id file stem (files = STEM.class_names and STEM.feature_names)")
(options, args) = optparser.parse_args()

step = options.step
detectorStep = {"TRAIN":None, "DEVEL":None, "EMPTY":None, "TEST":None} # TRAIN substep
if options.step != None and "." in options.step:
    step = options.step.split(".")[0]
    detectorStep[step] = options.step.split(".")[1]
selector = StepSelector(["TRAIN", "DEVEL", "EMPTY", "TEST"], fromStep=step)

# Check options
if options.classify:
    print "Classifying with existing models"
    options.mode = "POST-GRID"
assert options.output != None
assert options.task in ["OLD.1", "OLD.2", "CO", "REL", "GE", "GE.1", "GE.2", "EPI", "ID", "BB"]
fullTaskId = options.task
subTask = 2
if "." in options.task:
    options.task, subTask = options.task.split(".")
    subTask = int(subTask)
#dataPath = "/home/jari/biotext/BioNLP2011/data/main-tasks/"
if options.task == "REL":
    dataPath = os.path.expanduser("~/biotext/BioNLP2011/data/supporting-tasks/REL/")
示例#4
0
    optparser.add_option("--noLog", default=False, action="store_true", dest="noLog", help="")
    optparser.add_option("--noTestSet", default=False, action="store_true", dest="noTestSet", help="")
    optparser.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files")
    optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="More verbose output")
    (options, args) = optparser.parse_args()
    
    # Validate options
    assert options.output != None
    assert options.task in ["GE00", "GE09.1", "GE09.2", "GE", "GE.1", "GE.2", "EPI", "ID", "BB", "BI", "CO", "REL", "REN"]
    
    step = options.step
    detectorStep = {"TRAIN":None, "DEVEL":None, "EMPTY":None, "TEST":None} # TRAIN substep
    if options.step != None and "." in options.step:
        step = options.step.split(".")[0]
        detectorStep[step] = options.step.split(".")[1]
    selector = StepSelector(["TRAIN", "DEVEL", "EMPTY", "TEST"], fromStep=step)

    fullTaskId = options.task
    subTask = 2
    if "." in options.task:
        options.task, subTask = options.task.split(".")
        subTask = int(subTask)
    if options.task != None:
        dataPath = os.path.expanduser("~/biotext/BioNLP2011/data/main-tasks/")
        trainFile = dataPath + options.task + "/" + options.task + "-train" + options.extraTag + ".xml"
        develFile = dataPath + options.task + "/" + options.task + "-devel" + options.extraTag + ".xml"
        testFile = dataPath + options.task + "/" + options.task + "-test" + options.extraTag + ".xml" # test set never uses extratag
    # Optional overrides for input files
    if options.trainFile != None: trainFile = options.trainFile
    if options.develFile != None: develFile = options.develFile
    if options.testFile != None: testFile = options.testFile
示例#5
0
    help=
    "Speculation & negation SVM example class and feature id file stem (files = STEM.class_names and STEM.feature_names)"
)
(options, args) = optparser.parse_args()

step = options.step
detectorStep = {
    "TRAIN": None,
    "DEVEL": None,
    "EMPTY": None,
    "TEST": None
}  # TRAIN substep
if options.step != None and "." in options.step:
    step = options.step.split(".")[0]
    detectorStep[step] = options.step.split(".")[1]
selector = StepSelector(["TRAIN", "DEVEL", "EMPTY", "TEST"], fromStep=step)

# Check options
if options.classify:
    print "Classifying with existing models"
    options.mode = "POST-GRID"
assert options.output != None
assert options.task in [
    "OLD.1", "OLD.2", "CO", "REL", "GE", "GE.1", "GE.2", "EPI", "ID", "BB"
]
fullTaskId = options.task
subTask = 2
if "." in options.task:
    options.task, subTask = options.task.split(".")
    subTask = int(subTask)
#dataPath = "/home/jari/biotext/BioNLP2011/data/main-tasks/"