def setup(args): """ Sets up dataset and pipeline/model since it gets used by every command. :param args: Argparse args object. :return dataset, model: The dataset and model objects created. """ dataset = Dataset(args.dataset) entities = list(dataset.get_labels()) if args.test_mode: dataset.data_limit = 1 if args.entities is not None: with open(args.entities, 'rb') as f: data = json.load(f) json_entities = data['entities'] if not set(json_entities) <= set(entities): raise ValueError( f"The following entities from the json file are not in the provided dataset: {set(json_entities) - set(entities)}" ) entities = json_entities if args.custom_pipeline is not None: logging.info( f"Using custom pipeline configured at {args.custom_pipeline}") # Construct a pipeline class (not an instance) based on the provided json path; # args.custom_pipeline is that path Pipeline = json_to_pipeline(args.custom_pipeline) else: # Parse the argument as a class name in module medacy.pipelines module = importlib.import_module("medacy.pipelines") Pipeline = getattr(module, args.pipeline) logging.info('Using %s', args.pipeline) pipeline = Pipeline(entities=entities, cuda_device=args.cuda, word_embeddings=args.word_embeddings, batch_size=args.batch_size, learning_rate=args.learning_rate, epochs=args.epochs, pretrained_model=args.pretrained_model, using_crf=args.using_crf) model = Model(pipeline) return dataset, model
def setup(args): """ Sets up dataset and pipeline/model since it gets used by every command. :param args: Argparse args object. :return dataset, model: The dataset and model objects created. """ dataset = Dataset(args.dataset) entities = list(dataset.get_labels()) pipeline = None if args.pipeline == 'spacy': logging.info('Using spacy model') model = SpacyModel(spacy_model_name=args.spacy_model, cuda=args.cuda) elif args.custom_pipeline is not None: # Construct a pipeline class (not an instance) based on the provided json path; # args.custom_pipeline is that path Pipeline = json_to_pipeline(args.custom_pipeline) # All parameters are part of the class, thus nothing needs to be set when instantiating pipeline = Pipeline() model = Model(pipeline) else: # Parse the argument as a class name in module medacy.pipelines module = importlib.import_module("medacy.pipelines") Pipeline = getattr(module, args.pipeline) logging.info('Using %s', args.pipeline) pipeline = Pipeline(entities=entities, cuda_device=args.cuda, word_embeddings=args.word_embeddings, batch_size=args.batch_size, learning_rate=args.learning_rate, epochs=args.epochs, pretrained_model=args.pretrained_model, using_crf=args.using_crf) model = Model(pipeline) return dataset, model
import argparse import glob import os from collections import defaultdict from xml.etree import cElementTree from medacy.data.dataset import Dataset # Setup parser = argparse.ArgumentParser(description='n2c2: Evaluation script for Track 2') parser.add_argument('folder1', help='First data folder path (gold)') parser.add_argument('folder2', help='Second data folder path (system)') args = parser.parse_args() gold_dataset = Dataset(args.folder1) prediction_dataset = Dataset(args.folder2) global_tags = tuple(gold_dataset.get_labels() & prediction_dataset.get_labels()) class ClinicalCriteria(object): """Criteria in the Track 1 documents.""" def __init__(self, tid, value): """Init.""" self.tid = tid.strip().upper() self.ttype = self.tid self.value = value.lower().strip() def equals(self, other, mode='strict'): """Return whether the current criteria is equal to the one provided.""" if other.tid == self.tid and other.value == self.value: return True