Пример #1
0
    def __init__(self,
                 pre_negation_uncertainty_path,
                 negation_path,
                 post_negation_uncertainty_path,
                 verbose=False):
        self.parser = parse.Bllip(model_dir=PARSING_MODEL_DIR)
        self.ptb2dep = ptb2ud.Ptb2DepConverter(universal=True)
        self.lemmatizer = ptb2ud.Lemmatizer()

        self.verbose = verbose

        self.detector = ModifiedDetector(pre_negation_uncertainty_path,
                                         negation_path,
                                         post_negation_uncertainty_path)
Пример #2
0
def main(argv):
    argv = docopt.docopt(__doc__, argv=argv)
    print(argv)
    splitter = ssplit.NltkSSplitter(newline=True)
    parser = parse.Bllip(model_dir=argv['--model'])
    ptb2dep = ptb2ud.Ptb2DepConverter(universal=True)
    lemmatizer = ptb2ud.Lemmatizer()
    neg_detector = negdetect.Detector(argv['--neg-patterns'],
                                      argv['--uncertainty-patterns'])

    scan.scan_document(
        source=argv['SOURCE'],
        directory=argv['--out'],
        suffix='.neg.xml',
        fn=pipeline,
        non_sequences=[splitter, parser, ptb2dep, lemmatizer, neg_detector])
Пример #3
0
def main(argv):
    argv = docopt.docopt(__doc__, argv=argv)
    print(argv)

    splitter = ssplit.NltkSSplitter(newline=argv['--newline_is_sentence_break'])
    parser = parse.Bllip(model_dir=argv['--bllip-model'])
    ptb2dep = ptb2ud.Ptb2DepConverter(universal=True)
    lemmatizer = ptb2ud.Lemmatizer()
    mm = pymetamap.MetaMap.get_instance(argv['--metamap'])
    neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns'])

    if argv['--cuis'] == 'None':
        cuis = None
    else:
        cuis = dner_mm.read_cuis(argv['--cuis'])

    collection = text2bioc.text2collection(argv['SOURCE'], split_document=argv['--split-document'])
    pipeline(collection, mm, splitter, parser, ptb2dep, lemmatizer, neg_detector, cuis)

    with open(os.path.expanduser(argv['--out']), 'w') as fp:
        bioc.dump(collection, fp)