示例#1
0
def setup(args):
    """
    Sets up dataset and pipeline/model since it gets used by every command.
    :param args: Argparse args object.
    :return dataset, model: The dataset and model objects created.
    """
    dataset = Dataset(args.dataset)
    entities = list(dataset.get_labels())
    if args.test_mode:
        dataset.data_limit = 1

    if args.entities is not None:
        with open(args.entities, 'rb') as f:
            data = json.load(f)
        json_entities = data['entities']
        if not set(json_entities) <= set(entities):
            raise ValueError(
                f"The following entities from the json file are not in the provided dataset: {set(json_entities) - set(entities)}"
            )
        entities = json_entities

    if args.custom_pipeline is not None:
        logging.info(
            f"Using custom pipeline configured at {args.custom_pipeline}")
        # Construct a pipeline class (not an instance) based on the provided json path;
        # args.custom_pipeline is that path
        Pipeline = json_to_pipeline(args.custom_pipeline)
    else:
        # Parse the argument as a class name in module medacy.pipelines
        module = importlib.import_module("medacy.pipelines")
        Pipeline = getattr(module, args.pipeline)
        logging.info('Using %s', args.pipeline)

    pipeline = Pipeline(entities=entities,
                        cuda_device=args.cuda,
                        word_embeddings=args.word_embeddings,
                        batch_size=args.batch_size,
                        learning_rate=args.learning_rate,
                        epochs=args.epochs,
                        pretrained_model=args.pretrained_model,
                        using_crf=args.using_crf)

    model = Model(pipeline)
    return dataset, model
示例#2
0
def setup(args):
    """
    Sets up dataset and pipeline/model since it gets used by every command.

    :param args: Argparse args object.
    :return dataset, model: The dataset and model objects created.
    """
    dataset = Dataset(args.dataset)
    entities = list(dataset.get_labels())

    pipeline = None

    if args.pipeline == 'spacy':
        logging.info('Using spacy model')
        model = SpacyModel(spacy_model_name=args.spacy_model, cuda=args.cuda)
    elif args.custom_pipeline is not None:
        # Construct a pipeline class (not an instance) based on the provided json path;
        # args.custom_pipeline is that path
        Pipeline = json_to_pipeline(args.custom_pipeline)
        # All parameters are part of the class, thus nothing needs to be set when instantiating
        pipeline = Pipeline()
        model = Model(pipeline)
    else:
        # Parse the argument as a class name in module medacy.pipelines
        module = importlib.import_module("medacy.pipelines")
        Pipeline = getattr(module, args.pipeline)
        logging.info('Using %s', args.pipeline)

        pipeline = Pipeline(entities=entities,
                            cuda_device=args.cuda,
                            word_embeddings=args.word_embeddings,
                            batch_size=args.batch_size,
                            learning_rate=args.learning_rate,
                            epochs=args.epochs,
                            pretrained_model=args.pretrained_model,
                            using_crf=args.using_crf)

        model = Model(pipeline)

    return dataset, model
示例#3
0
import argparse
import glob
import os
from collections import defaultdict
from xml.etree import cElementTree
from medacy.data.dataset import Dataset

# Setup
parser = argparse.ArgumentParser(description='n2c2: Evaluation script for Track 2')
parser.add_argument('folder1', help='First data folder path (gold)')
parser.add_argument('folder2', help='Second data folder path (system)')
args = parser.parse_args()

gold_dataset = Dataset(args.folder1)
prediction_dataset = Dataset(args.folder2)
global_tags = tuple(gold_dataset.get_labels() & prediction_dataset.get_labels())


class ClinicalCriteria(object):
    """Criteria in the Track 1 documents."""

    def __init__(self, tid, value):
        """Init."""
        self.tid = tid.strip().upper()
        self.ttype = self.tid
        self.value = value.lower().strip()

    def equals(self, other, mode='strict'):
        """Return whether the current criteria is equal to the one provided."""
        if other.tid == self.tid and other.value == self.value:
            return True