Python Preprocessor.stepArgs примеры использования

Язык программирования: Python

Пространство имен/Пакет: Detectors.Preprocessor

Класс/Тип: Preprocessor

Метод/Функция: stepArgs

Примеров на hotexamples.com: 5

Python Preprocessor.stepArgs - 5 примеров найдено. Это лучшие примеры Python кода для Detectors.Preprocessor.Preprocessor.stepArgs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Preprocessor(11)

process(9)

setArgForAllSteps(6)

stepArgs(3)

getStep(2)

getOutputPath(1)

getParameters(1)

hasStep(1)

compressIntermediateFiles(1)

intermediateFilesAtSource(1)

getHelpString(1)

requireEntitiesForParsing(1)

debug(1)

setIntermediateFile(1)

setNoIntermediateFiles(1)

source(1)

insertStep(1)

Пример #1

Показать файл

Файл: convertDDI13.py Проект: DUT-LiuYang/TEES

def parseXML(xml, intermediateFileDir, debug=False):
    preprocessor = Preprocessor()
    preprocessor.setArgForAllSteps("debug", debug)
    preprocessor.stepArgs("PARSE")["requireEntities"] = False
    #preprocessor.process(xml, intermediateFileDir, fromStep="SPLIT-SENTENCES", toStep="FIND-HEADS", omitSteps=["NER"])
    #preprocessor.process(xml, intermediateFileDir, fromStep="PARSE", toStep="FIND-HEADS")
    # Entity name splitting is omitted as this data may be used for predicting entities
    preprocessor.process(xml, intermediateFileDir, omitSteps=["CONVERT", "SPLIT-SENTENCES", "NER", "SPLIT-NAMES", "DIVIDE-SETS"])

Пример #2

Показать файл

def parseXML(xml, intermediateFileDir, debug=False):
    preprocessor = Preprocessor()
    preprocessor.setArgForAllSteps("debug", debug)
    preprocessor.stepArgs("PARSE")["requireEntities"] = False
    #preprocessor.process(xml, intermediateFileDir, fromStep="SPLIT-SENTENCES", toStep="FIND-HEADS", omitSteps=["NER"])
    #preprocessor.process(xml, intermediateFileDir, fromStep="PARSE", toStep="FIND-HEADS")
    # Entity name splitting is omitted as this data may be used for predicting entities
    preprocessor.process(xml,
                         intermediateFileDir,
                         omitSteps=[
                             "CONVERT", "SPLIT-SENTENCES", "NER",
                             "SPLIT-NAMES", "DIVIDE-SETS"
                         ])

Пример #3

Показать файл

Файл: convertBioNLP.py Проект: sbnlp/2017BioNLPEvaluation

def parseXML(xml,
             outStem,
             intermediateFiles=True,
             debug=False,
             bbResources=False):
    preprocessor = Preprocessor()
    if bbResources:
        preprocessor.insertStep(5, "BB_RESOURCES", insertResources.process, {},
                                "bb-resources.xml")
    preprocessor.setArgForAllSteps("debug", debug)
    preprocessor.stepArgs("PARSE")["requireEntities"] = False
    if not intermediateFiles:
        preprocessor.setNoIntermediateFiles()
    preprocessor.process(xml, outStem, omitSteps=["NER", "DIVIDE-SETS"])

Пример #4

Показать файл

Файл: convertSemEval2010Task8.py Проект: jbjorne/TEES

def convert(inPath, outDir, corpusId, directed, negatives, preprocess, preprocessorParameters=None, debug=False, clear=False, constParser="BLLIP-BIO", depParser="STANFORD-CONVERT", logging=True):
    assert negatives in ("INCLUDE", "SKIP", "REVERSE_POS")
    # Download the corpus if needed
    if inPath == None:
        if not hasattr(Settings, "SE10T8_CORPUS"):
            SemEval2010Task8Tools.install()
        inPath = Settings.SE10T8_CORPUS
    assert os.path.exists(inPath)
    # Prepare the output directory
    if not os.path.exists(outDir):
        print "Making output directory", outDir
        os.makedirs(outDir)
    elif clear:
        print "Removing output directory", outDir
        shutil.rmtree(outDir)
    # Start logging
    if logging:
        Stream.openLog(os.path.join(outDir, "log.txt"), clear=clear)
    # Read and process the corpus files
    archive = zipfile.ZipFile(inPath, 'r')
    usedIds = set()
    tree = None
    for fileName, setName in [("SemEval2010_task8_all_data/SemEval2010_task8_training/TRAIN_FILE.TXT", "train"),\
                              ("SemEval2010_task8_all_data/SemEval2010_task8_testing_keys/TEST_FILE_FULL.TXT", "test")]:
        print "Processing file", fileName, "as set", setName
        f = archive.open(fileName)
        tree = processLines(f.readlines(), setName, directed=directed, negatives=negatives, usedIds=usedIds, tree=tree, corpusId=corpusId)
        f.close()
    # Divide the training set into training and development sets
    MakeSets.processCorpus(tree, None, "train", [("train", 0.7), ("devel", 1.0)], 1)
    # Write out the converted corpus
    convertedPath = os.path.join(outDir, corpusId + "-converted.xml")
    ETUtils.write(tree.getroot(), convertedPath)
    # Preprocess the converted corpus
    if preprocess:
        outPath = os.path.join(outDir, corpusId + ".xml")
        preprocessor = Preprocessor(constParser, depParser)
        preprocessor.setArgForAllSteps("debug", debug)
        preprocessor.stepArgs("CONVERT")["corpusName"] = corpusId
        preprocessor.process(convertedPath, outPath, preprocessorParameters, omitSteps=["SPLIT-SENTENCES", "NER", "SPLIT-NAMES"])
    # Stop logging
    if logging:
        Stream.closeLog(os.path.join(outDir, "log.txt"))

Пример #5

Показать файл

Файл: convertSemEval2010Task8.py Проект: Mu-Y/BioNLPST

def convert(inPath,
            outDir,
            corpusId,
            directed,
            negatives,
            preprocess,
            preprocessorParameters=None,
            debug=False,
            clear=False,
            constParser="BLLIP-BIO",
            depParser="STANFORD-CONVERT",
            logging=True):
    assert negatives in ("INCLUDE", "SKIP", "REVERSE_POS")
    # Download the corpus if needed
    if inPath == None:
        if not hasattr(Settings, "SE10T8_CORPUS"):
            SemEval2010Task8Tools.install()
        inPath = Settings.SE10T8_CORPUS
    assert os.path.exists(inPath)
    # Prepare the output directory
    if not os.path.exists(outDir):
        print "Making output directory", outDir
        os.makedirs(outDir)
    elif clear:
        print "Removing output directory", outDir
        shutil.rmtree(outDir)
    # Start logging
    if logging:
        Stream.openLog(os.path.join(outDir, "log.txt"), clear=clear)
    # Read and process the corpus files
    archive = zipfile.ZipFile(inPath, 'r')
    usedIds = set()
    tree = None
    for fileName, setName in [("SemEval2010_task8_all_data/SemEval2010_task8_training/TRAIN_FILE.TXT", "train"),\
                              ("SemEval2010_task8_all_data/SemEval2010_task8_testing_keys/TEST_FILE_FULL.TXT", "test")]:
        print "Processing file", fileName, "as set", setName
        f = archive.open(fileName)
        tree = processLines(f.readlines(),
                            setName,
                            directed=directed,
                            negatives=negatives,
                            usedIds=usedIds,
                            tree=tree,
                            corpusId=corpusId)
        f.close()
    # Divide the training set into training and development sets
    MakeSets.processCorpus(tree, None, "train", [("train", 0.7),
                                                 ("devel", 1.0)], 1)
    # Write out the converted corpus
    convertedPath = os.path.join(outDir, corpusId + "-converted.xml")
    ETUtils.write(tree.getroot(), convertedPath)
    # Preprocess the converted corpus
    if preprocess:
        outPath = os.path.join(outDir, corpusId + ".xml")
        preprocessor = Preprocessor(constParser, depParser)
        preprocessor.setArgForAllSteps("debug", debug)
        preprocessor.stepArgs("CONVERT")["corpusName"] = corpusId
        preprocessor.process(
            convertedPath,
            outPath,
            preprocessorParameters,
            omitSteps=["SPLIT-SENTENCES", "NER", "SPLIT-NAMES"])
    # Stop logging
    if logging:
        Stream.closeLog(os.path.join(outDir, "log.txt"))