Exemplo n.º 1
0
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT %s,%s FROM %s''' % (
            options.id, options.report_text, options.table)

        self.conn = sqlite.connect(options.dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process", len(self.reports)
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.instantiateFromCSVtoitemData(
            options.lexical_kb,
            literalColumn=0,
            categoryColumn=1,
            regexColumn=2,
            ruleColumn=3)
        self.targets = itemData.instantiateFromCSVtoitemData(options.domain_kb,
                                                             literalColumn=0,
                                                             categoryColumn=1,
                                                             regexColumn=2,
                                                             ruleColumn=3)
Exemplo n.º 2
0
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """


        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        self.query1 = '''SELECT %s,%s FROM %s'''%(options.id,options.report_text,options.table)

        self.conn = sqlite.connect(options.dbname)
        self.cursor = self.conn.cursor()
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process",len(self.reports)
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.instantiateFromCSVtoitemData(options.lexical_kb,
                literalColumn=0,
                categoryColumn=1,
                regexColumn=2,
                ruleColumn=3)
        self.targets = itemData.instantiateFromCSVtoitemData(options.domain_kb,
                literalColumn=0,
                categoryColumn=1,
                regexColumn=2,
                ruleColumn=3)
Exemplo n.º 3
0
    def PerformAnnotation(
            cls,
            pyConTextInputObject,
            targetFilePath=defaultTargetFilePath,
            modifiersFilePath=defaultModifiersFilePath,
            modifierToClassMap=defaultModifierToAnnotationClassMap,
            annotationGroup="MIMC_v2"):
        """
        This method runs PyConText on the input Sentence objects and returns a Document object, or a list of Document
        objects if Sentences from multiple notes are passed as input.

        :param pyConTextInputObject: [object] An instance of PyConTextInput produced by one of the sentence splitters containing sentences to be split.
        :param targetFilePath: [string] The path to the tsv file containing the PyConText target terms.
        :param modifiersFilePath: [string] The path to the tsv file containing the PyConText modifier terms.
        :param modifierToClassMap: [dict] A dictionary used to map eHost classes to pyConText modifier types.
        :param annotationGroup: [string] The current annotation round.
        :return: [object | list of objects] A single Document instance if all the sentences share a common documentName or a list of Document
        objects if the input sentences are from multiple notes.
        """

        targets = itemData.instantiateFromCSVtoitemData(targetFilePath)
        modifiers = itemData.instantiateFromCSVtoitemData(modifiersFilePath)

        return _performAnnotationInternal(pyConTextInputObject, targets,
                                          modifiers, modifierToClassMap,
                                          annotationGroup)
Exemplo n.º 4
0
    def setUp(self):
        self.targets = os.path.abspath('../lexicon/targets.tsv')
        self.modifiers = os.path.abspath('../lexicon/modifiers.tsv')

        try:
            itemData.instantiateFromCSVtoitemData(self.modifiers)
        except IndexError as e:
            print("Modifiers failed to load")
            raise e
        try:
            itemData.instantiateFromCSVtoitemData(self.targets)
        except IndexError as e:
            print("Targets failed to load")
            raise e

        #targets = 'https://raw.githubusercontent.com/abchapman93/hai_detect/master/lexicon/targets.tsv'
        #modifiers = 'https://raw.githubusercontent.com/abchapman93/hai_detect/master/lexicon/modifiers.tsv'
        self.model = MentionLevelModel(self.targets, self.modifiers)

        self.doc1 = ClinicalTextDocument.ClinicalTextDocument(
            'There is an abscess near the abdomen.')
        self.doc1.annotate(self.model)
        self.annotation_pos_ssi1 = self.doc1.annotations[0]

        self.doc2 = ClinicalTextDocument.ClinicalTextDocument(
            'There is no erythema to be seen along the surgical site.')
        self.doc2.annotate(self.model)
        self.annotation_neg_ssi2 = self.doc2.annotations[0]

        self.doc3 = ClinicalTextDocument.ClinicalTextDocument(
            'We discussed the risks of surgery, including abscess and \
                                                              erythema.')
        self.doc3.annotate(self.model)
        self.annotation_hyp_ssi3 = self.doc3.annotations[0]

        self.doc4 = ClinicalTextDocument.ClinicalTextDocument(
            'There is abscess.')
        self.doc4.annotate(self.model)

        self.doc5 = ClinicalTextDocument.ClinicalTextDocument(
            'The patient has a history of wound infection.')
        self.doc5.annotate(self.model)
        self.annotation_hist_ssi5 = self.doc5.annotations[0]

        self.doc6 = ClinicalTextDocument.ClinicalTextDocument(
            'There were complications due to pneumonia that was likely\
                                                              present at the time of surgery.'
        )
        self.doc6.annotate(self.model)
        self.annotation_hist_ssi6 = self.doc6.annotations[0]
Exemplo n.º 5
0
 def setUp(self):
     self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.'
     self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt)
     self.sentences = [x.text for x in self.sentenceSpanPairs]
     self.spans = [x.span for x in self.sentenceSpanPairs]
     #self.sentences = self.sentences.remove('')
     self.modifiers = itemData.instantiateFromCSVtoitemData(
         "/Users/alec/Box Sync/Bucher_Surgical_MIMICIII/pyConText_implement/fcFinder/modifiers.tsv"
     )
     self.targets = itemData.instantiateFromCSVtoitemData(
         "file:///Users/alec/Box Sync/Bucher_Surgical_MIMICIII/pyConText_implement/fcFinder/targets.tsv"
     )
     self.markups = [fc.markup_sentence(x) for x in self.sentences]
     self.first_markup = self.markups[0]
     self.document = fc.create_context_doc(self.markups)
Exemplo n.º 6
0
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process
        
       
        self.items = itemData.instantiateFromCSVtoitemData(options.items,
                literalColumn=options.lc,
                categoryColumn=options.cc,
                regexColumn=options.rec,
                ruleColumn=options.ruc)
Exemplo n.º 7
0
    def __init__(self, options):
        """create an instance of a criticalFinder object associated with the SQLite
        database.
        dbname: name of SQLite database
        """

        # Define queries to select data from the SQLite database
        # this gets the reports we will process

        self.items = itemData.instantiateFromCSVtoitemData(
            options.items,
            literalColumn=options.lc,
            categoryColumn=options.cc,
            regexColumn=options.rec,
            ruleColumn=options.ruc)
Exemplo n.º 8
0
 def test_modifiers_load(self):
     try:
         itemData.instantiateFromCSVtoitemData(self.modifiers)
     except IndexError as e:
         self.fail(msg="Modifiers failed to load")
Exemplo n.º 9
0
        self.mode = mode
        self.dbname = dbname
        self.getDBConnection(self.dbname)

        # get reports to process
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process",len(self.reports)
        # Create the pyConTextNLP ConTextDocument. This is the container for all the markups
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.itemData()
        self.targets = itemData.itemData()
        for kb in lexical_kb:
            self.modifiers.extend( itemData.instantiateFromCSVtoitemData(kb) )
        for kb in domain_kb:
            self.targets.extend( itemData.instantiateFromCSVtoitemData(kb) )


        self.debug = debug
        if( self.debug ):
            print "debug set to True"
            tmp = os.path.splitext(self.dbname)
            self.debugDir = tmp[0]+"_debug_dir"
            if( not os.path.exists(self.debugDir) ):
                os.mkdir(self.debugDir)
        else:
            self.debugDir = ''
    def readRules(self,fname):
        """read the sentence level rules"""
Exemplo n.º 10
0
        print passedColor + "Passed\n" + resetColor

#### Test PyConTextInterface.SentenceSplitters.TargetSpanSplitter ####
    printTestName(
        'Testing PyConTextInterface.SentenceSplitters.TargetSpanSplitter')
    from eHostess.PyConTextInterface.SentenceSplitters.TargetSpanSplitter import splitSentencesSingleDocument
    from eHostess.PyConTextInterface.SentenceSplitters.TargetSpanSplitter import splitSentencesMultipleDocuments
    import pyConTextNLP.itemData as itemData
    from eHostess.PyConTextInterface.SentenceSplitters.PyConTextInput import DocumentPlaceholder
    failed = False
    testDocPath = "./UnitTestDependencies/PyConText/SentenceSplitters/TargetSpanSplitter/Docs/TestDocToSplit.txt"
    testTargetsPath = os.path.join(
        os.getcwd(),
        "./UnitTestDependencies/PyConText/SentenceSplitters/TargetSpanSplitter/testTargets.tsv"
    )
    targets = itemData.instantiateFromCSVtoitemData(testTargetsPath)
    pyConTextInput = splitSentencesSingleDocument(testDocPath, targets, 4, 4)

    if len(pyConTextInput.keys()) != 1 or len(
            pyConTextInput['TestDocToSplit']) != 3:
        failed = True
    if pyConTextInput['TestDocToSplit'][0].text != 'twelve thir^$#)(teen hemorrhage fourteen, brbpr [fifteen]' \
            or pyConTextInput['TestDocToSplit'][1].text != 'three, four% five s^$#)ix bleed seven, eight [nine]'\
            or pyConTextInput['TestDocToSplit'][2].text != 'ten,\neleven% twelve thir^$#)(teen hemorrhage fourteen, brbpr [fifteen]':
        print "Text was parsed incorrectly."
        failed = True
    if pyConTextInput['TestDocToSplit'][0].documentSpan != (74, 131) or pyConTextInput['TestDocToSplit'][1].documentSpan != (8, 59) \
            or pyConTextInput['TestDocToSplit'][2].documentSpan != (61, 131):
        print "Span parsed incorrectly."
        failed = True
    if pyConTextInput['TestDocToSplit'][0].documentName != 'TestDocToSplit' or pyConTextInput['TestDocToSplit'][1].documentName != 'TestDocToSplit'\
import eHostess.Analysis.Metrics as Metrics
from eHostess.Annotations.Document import ClassifiedDocument
import numpy as np

adjudicatedDirectories = ['a list of paths, each to a directory containing annotated notes. Typically when using an annotation tool like eHost notes will be divided into several batches, 
                            this is a list of paths to those batches.']
corpusDirectories = ['for our project the raw training documents were split up into batches, this is a list of directories containing those batches']
PATH_TO_TARGETS = "./targets.tsv"
PATH_TO_MODIFIERS = "./modifiers.tsv"

print "Parsing adjudicated docs..."
adjudicatorDocs = KnowtatorReader.parseMultipleKnowtatorFiles(adjudicatedDirectories)
if len(adjudicatorDocs) != 30 * len(adjudicatedDirectories):
    raise RuntimeError("There should be %i annotated eHost documents. But there are %i." % (4 * len(adjudicatedDirectories), len(adjudicatorDocs)))

targets = itemData.instantiateFromCSVtoitemData(PATH_TO_TARGETS)
pyConTextInput = TargetSpanSplitter.splitSentencesMultipleDocuments(corpusDirectories, targets, 8, 8)

pyConTextDocs = PyConTextInterface.PerformAnnotation(pyConTextInput)
if len(pyConTextDocs) != 30 * len(corpusDirectories):
    raise RuntimeError("There should be %i annotated pyConText documents. But there are %i." % (4 * len(corpusDirectories), len(pyConTextDocs)))

classifiedAdjudicatorDocs = []
for doc in adjudicatorDocs:
    annotations = doc.annotations
    annotationsToKeep = []
    currentDocumentClass = ''
    for annotation in annotations:
        if annotation.annotationClass == 'doc_classification':
            currentDocumentClass = annotation.attributes["present_or_absent"]
        else:
Exemplo n.º 12
0
"""
@author: Alec Chapman
Last Updated: 5-10-17
"""

import pyConTextNLP.pyConTextGraph as pyConText
import pyConTextNLP.itemData as itemData

try:
    modifiers = itemData.instantiateFromCSVtoitemData(
        'https://raw.githubusercontent.com/abchapman93/fcFinder/master/modifiers.tsv'
    )
    targets = itemData.instantiateFromCSVtoitemData(
        'https://raw.githubusercontent.com/abchapman93/fcFinder/master/targets.tsv'
    )
except:
    modifiers = None
    targets = None


class markup_conditions(object):
    """This class creates the conditions of interest for a markup.
    A rule-based classifier can then assign a class to a markup based on rules
    pertaining to these conditions.
    """
    def __init__(self,
                 markup=None,
                 target_values=[['fluid_collection']],
                 target=None,
                 modifiers=[],
                 definitive=False,
    predicted = (predicted - 1) * -1
    return precision_score(true, predicted)


def printScores(truth, predictions):
    accuracy = accuracy_score(truth, predictions)
    precision, recall, fscore, support = precision_recall_fscore_support(
        truth, predictions, average="binary")
    npv = NPV(truth, predictions)
    specificity = specificity_score(truth, predictions)

    print"Accuracy: %.3f\nF-Score: %.3f\nPrecision: %.3f\nRecall (Sensitivity): %.3f\nSpecificity: %.3f\nNPV: %.3f" \
    % (accuracy, fscore, precision, recall, specificity, npv)


targets = itemData.instantiateFromCSVtoitemData(TARGETS_PATH)
sentences = SpanSplitter.splitSentencesMultipleDocuments(
    NOTES_DIR, targets, 10, 10)
splitter = "Span"
# notesList = glob.glob(NOTES_DIR + "*")
# sentences = SpacySplitter.splitSentencesMultipleDocuments(notesList)
# splitter = "Spacy"
pyConText = pyConText.PyConTextInterface()
docs = pyConText.PerformAnnotation(sentences)

names, classes = getNotesAndClasses(GOLD_STANDARD_FILEPATH)

truth, predictions = produceClassifications(docs, names, classes)

print "Scores for splitter: %s" % (splitter)
printScores(truth, predictions)
Exemplo n.º 14
0
import pyConTextNLP.pyConTextGraph as pyConText
import pyConTextNLP.itemData as itemData
import networkx as nx

modifiers = itemData.instantiateFromCSVtoitemData(
    "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.tsv"
)


def markup_sentence(sentence, modifiers, targets, prune_inactive=True):
    markup = pyConText.ConTextMarkup()
    markup.setRawText(sentence.lower())
    markup.cleanText()
    markup.markItems(modifiers, mode="modifier")
    markup.markItems(targets, mode="target")
    markup.pruneMarks()
    markup.dropMarks('Exclusion')
    # apply modifiers to any targets within the modifiers scope
    markup.applyModifiers()
    markup.pruneSelfModifyingRelationships()
    if prune_inactive:
        markup.dropInactiveModifiers()
    return markup


def clean_up_report(report, start_phrase):
    report = report.lower()
    # remove text prior to start_phrase
    if start_phrase:
        if report.find(start_phrase.lower()) != -1:
            report = report[report.index(start_phrase.lower()) +
Exemplo n.º 15
0
 def test_targets_load(self):
     try:
         itemData.instantiateFromCSVtoitemData(self.targets)
     except IndexError as e:
         self.fail(msg="Targets failed to load.")
Exemplo n.º 16
0
class pyConTextNLP_REST(object):

    mod = itemData.instantiateFromCSVtoitemData(
        "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/lexical_kb_05042016.tsv"
    )
    tar = itemData.instantiateFromCSVtoitemData(
        "https://raw.githubusercontent.com/chapmanbe/pyConTextNLP/master/KB/utah_crit.tsv"
    )

    clrs ={\
    "bowel_obstruction": "blue",
    "inflammation": "blue",
    "definite_negated_existence": "red",
    "probable_negated_existence": "indianred",
    "ambivalent_existence": "orange",
    "probable_existence": "forestgreen",
    "definite_existence": "green",
    "historical": "goldenrod",
    "indication": "pink",
    "acute": "golden"
    }

    @cherrypy.expose
    def index(self):
        return "Welcome to pyConTextNLP REST API. To start go to /markup_report."

    @cherrypy.expose
    def markup_report(
            self,
            report='''IMPRESSION: Evaluation limited by lack of IV contrast; however, no evidence of
                                    bowel obstruction or mass identified within the abdomen or pelvis. 
                                    Non-specific interstitial opacities and bronchiectasis seen at the right
                                    base, suggestive of post-inflammatory changes.
                                    ''',
            modifiers=None,
            targets=None):
        print("type of modifiers", type(modifiers))
        print("len of modifiers", len(modifiers))
        print(modifiers)
        for m in modifiers:
            print(m)

        if modifiers == None:
            _modifiers = self.mod
        else:
            _modifiers = itemData.itemData()
            _modifiers.extend(json.loads(modifiers))
        if targets == None:
            _targets = self.tar
        else:
            _targets = itemData.itemData()
            _targets.extend(json.loads(targets))

        context = self.split_sentences(report, _modifiers, _targets)
        clrs = self.get_colors_dict(_modifiers, _targets)
        return html.mark_document_with_html(context, colors=clrs)

    def markup_sentence(self, s, modifiers, targets, prune_inactive=True):
        """
        """

        markup = pyConText.ConTextMarkup()
        markup.setRawText(s)
        markup.cleanText()
        markup.markItems(modifiers, mode="modifier")
        markup.markItems(targets, mode="target")
        markup.pruneMarks()
        markup.dropMarks('Exclusion')
        # apply modifiers to any targets within the modifiers scope
        markup.applyModifiers()
        markup.pruneSelfModifyingRelationships()
        if prune_inactive:
            markup.dropInactiveModifiers()
        return markup

    def split_sentences(self, report, modifiers, targets):
        blob = TextBlob(report.lower())
        count = 0
        rslts = []
        for s in blob.sentences:
            m = self.markup_sentence(s.raw, modifiers, targets)
            rslts.append(m)

        context = pyConText.ConTextDocument()
        for r in rslts:
            context.addMarkup(r)

        return context

    def get_colors_dict(self, modifiers, targets):
        # this method will basically assign blue to all targets
        # and then assigns a different color  for each modifier category
        #import colorsys
        import randomcolor
        colors = {}
        rcol = randomcolor.RandomColor()
        for t in targets:
            colors[t.getCategory()[0]] = 'blue'
        mm = set([c.getCategory()[0] for c in modifiers])
        #HSV = [(x*1.0/len(mm), 0.5, 0.5) for x in range(len(mm))]
        #RGB = map(lambda x: colorsys.hsv_to_rgb(*x), HSV)
        #RGB = lambda: random.randint(0,255)
        #for m,rgb in zip(mm,RGB):
        for m in mm:
            colors[m] = rcol.generate(
            )[0]  #"rgb{0}".format(rgb)#"rgb({0},{1},{2})".format(RGB(),RGB(),RGB())

        return colors
    '/users/shah/Box Sync/MIMC_v2/Annotation/Adjudication/batch_0/corpus/',
    '/users/shah/Box Sync/MIMC_v2/Annotation/Adjudication/batch_1/corpus/',
    '/users/shah/Box Sync/MIMC_v2/Annotation/Adjudication/batch_2/corpus/',
    '/users/shah/Box Sync/MIMC_v2/Annotation/Adjudication/batch_3/corpus/'
]

print "Parsing adjudicated docs..."
adjudicatorDocs = KnowtatorReader.parseMultipleKnowtatorFiles(
    adjudicatedDirectories)
if len(adjudicatorDocs) != 30 * len(adjudicatedDirectories):
    raise RuntimeError(
        "There should be %i annotated eHost documents. But there are %i." %
        (4 * len(adjudicatedDirectories), len(adjudicatorDocs)))

targets = itemData.instantiateFromCSVtoitemData(
    '/Users/shah/Developer/PythonVirtualEnv/lib/python2.7/site-packages/eHostess/PyConTextInterface/TargetsAndModifiers/targets.tsv'
)
pyConTextInput = TargetSpanSplitter.splitSentencesMultipleDocuments(
    corpusDirectories, targets, 8, 8)
#pyConTextInput = BuiltinSplitter.splitSentencesMultipleDocuments(corpusDirectories)

pyConTextDocs = PyConTextInterface.PerformAnnotation(pyConTextInput)
if len(pyConTextDocs) != 30 * len(corpusDirectories):
    raise RuntimeError(
        "There should be %i annotated pyConText documents. But there are %i." %
        (4 * len(corpusDirectories), len(pyConTextDocs)))

classifiedAdjudicatorDocs = []
for doc in adjudicatorDocs:
    annotations = doc.annotations
    annotationsToKeep = []
Exemplo n.º 18
0
def resolveSentencePyConTextNLPExtended(sentence):
    def getNegationValue(g, te):
        hist = getHistoricityValue(g, te)
        if (g.isModifiedByCategory(te, "DEFINITE_NEGATED_EXISTENCE")
                or g.isModifiedByCategory(
                    te, "PROBABLE_NEGATED_EXISTENCE")) and hist:
            return False
        return True

    def getHistoricityValue(g, te):
        if g.isModifiedByCategory(te, "HISTORICAL"):
            return False
        return True

    def getExperiencerValue(g, te):
        if g.isModifiedByCategory(te, "EXPERIENCER"):
            return False
        return True

    #Read in modifiers from lexicon file#
    modlexicon = "/home/gkotsis/projects/pycontext/lexical_kb_04292013.tsv"
    #modlexicon = "/home/gkotsis/projects/pycontext/negex_orig_triggers_in_pycontext_format.csv"
    # modlexicon = "/home/gkotsis/projects/pycontext/kcl_negation_cues.csv"
    tlexicon = "/home/gkotsis/projects/pycontext/targets_suicidality.csv"

    #create itemData instances from the lexicon#
    modifiers = itemData.instantiateFromCSVtoitemData(modlexicon, 'utf-8', 1,
                                                      0, 1, 2, 3)

    #Define the targets#
    #targets = itemData.itemData()
    #tmp = ['suicide','SUICIDE',r'''suicid*''','']
    #item = itemData.contextItem(tmp)
    #targets.append(item)
    targets = itemData.instantiateFromCSVtoitemData(tlexicon, 'utf-8', 1, 0, 1,
                                                    2, 3)

    #apply pyConTextNLP on the sentences along with the specified targets and modifiers#

    def analyzeSentence(sentence, targets=targets, modifiers=modifiers):
        sentence = sentenceNLP.preprocess(sentence, "suicide")
        print sentence
        counter = 0
        counter += 1
        # print "sentence no: "+str(counter)+" - "+sentence
        context = pyConText.ConTextDocument()
        markup = pyConText.ConTextMarkup()
        markup.setRawText(sentence)
        markup.markItems(modifiers, mode="modifier")
        markup.markItems(targets, mode="target")

        markup.pruneMarks()
        markup.dropMarks('Exclusion')
        markup.applyModifiers()

        markup.dropInactiveModifiers()

        context.addMarkup(markup)
        g = context.getDocumentGraph()

        ma = g.getMarkedTargets()
        print g
        # if len(ma)==0:
        # 	print sentence
        for te in ma:
            print te
            return getNegationValue(g, te)

        return None

    return analyzeSentence(sentence, targets=targets, modifiers=modifiers)
Exemplo n.º 19
0
def main():
    modifiers = itemData.instantiateFromCSVtoitemData(MODIFIERS_FILE)
    targets = targets = itemData.instantiateFromCSVtoitemData(TARGETS_FILE)

    df = pd.read_pickle(SOURCE_DF)
    df = df[df.train_val == 'val']
    print(df.head())
    print(len(df))
    #df = df.iloc[:10]
    ref = pd.read_excel(REFERENCE_STANDARD)
    ref = update_reference_df(ref)
    reports = list(zip(df['note_name'], df['text']))
    pool = Pool(processes=8)
    list_of_classified_markups = [
        pool.apply(extract_markups_from_text,
                   args=(name_and_text, targets, modifiers))
        for name_and_text in reports
    ]
    pool.close()
    pool.join()
    classified_markups = pd.DataFrame(
        columns=['m', 'doc_span', 'markup_class', 'text']).append(
            list_of_classified_markups)
    print(classified_markups.head())
    exit()
    ##PICK up here

    classified_markups = [{
        'm': m,
        'doc_span': m.docSpan,
        'markup_class': m.markup_class,
        'text': m.text
    } for m in list_of_markups]

    # TODO: Make this one long dataframe, like classified_markups
    df['markups'] = df.apply(
        lambda row: extract_markups_from_text(row.text, targets, modifiers),
        axis=1)
    print(df.head())
    classified_markups = pd.DataFrame(
        columns=['m', 'doc_span', 'markup_class', 'text'])
    for idx, row in df.iterrows():
        # Get all annotations from reference standard with this report name
        #annotations = ref[ref['File Name with extension'] == row.note_name]
        row_markups = classify_markups(row.markups, row.note_name)
        print(classified_markups)
        #if classified_markups
        classified_markups = classified_markups.append(row_markups,
                                                       ignore_index=True)
    print(len(classified_markups))
    print(classified_markups.head())
    evaluate_markups(ref, classified_markups)

    exit()
    reports = list(df[df.train_val == 'train']['text'])
    reports = [helpers.preprocess(report) for report in reports]
    split_reports = [
        helpers.my_sentence_splitter(report) for report in reports
    ]
    markups = []
    for report in split_reports[:10]:
        # Each report is a list of sentence span pairs
        for text, span in report:
            m = create_markup(s=text,
                              modifiers=modifiers,
                              targets=targets,
                              span=span)
            markups.append(m)
    print(markups)
    exit()

    markups = [
        create_markup(s=sentence,
                      modifiers=modifiers,
                      targets=targets,
                      span=span) for (sentence, span) in sentence_span_pairs
    ]

    report_names = list(set(df.note_name))
    for report in report_names:
        report_df = df[df.note_name == report]
        evaluate_report(report_df)
Exemplo n.º 20
0
 def instantiate_targets(self):
     targets = itemData.instantiateFromCSVtoitemData(self.targets_file)
     return targets
Exemplo n.º 21
0
 def instantiate_modifiers(self):
     modifiers = itemData.instantiateFromCSVtoitemData(self.modifiers_file)
     return modifiers
Exemplo n.º 22
0
def run(
    report_file,
    TARGETS,
    include_targets=None,
    exclude_targets=None,
):
    """Run the parsing of inputs to set the objects required for tbiExtractor.

    Args:
        report_file (pathlib.PosixPath): Path to the .txt file
            containing the radiology report.

        TARGETS (list): Default list of lexical targets.

        >>>>> Can only set to include or exclude lexical target options to limit
                the search. Defaults to standard target list.

        include_targets (list): A subset of the available lexical targets options to
            include. Default: None, resulting in standard target list output.

        exclude_targets (list): A subset of the available lexical targets options to
            exclude. Default: None, resulting in standard target list output.

    Returns:
        targets (pyConTextNLP.itemData.itemData): itemData stores a literal,
            category, regular expression, and rule of the targets extracted
            from the targets_file input.

        modifiers (pyConTextNLP.itemData.itemData): itemData stores a literal,
            category, regular expression, and rule of the modifiers extracted
            from the modifiers_file input.

        doc (spacy.tokens.doc.Doc): spaCy Document containing the radiology
            report.

    """
    # Load lexical targets and lexical modifiers as itemData
    targets = itemData.instantiateFromCSVtoitemData(f"file:{targets_file}")
    modifiers = itemData.instantiateFromCSVtoitemData(f"file:{modifiers_file}")

    # From include and exclude lists, determine algorithm targets
    specified_targets = alter_default_input(TARGETS,
                                            include=include_targets,
                                            exclude=exclude_targets)

    # Remove lexical targets from investigation set
    targets = [x for x in targets if x.categoryString() in specified_targets]

    # Load spacy model
    nlp = download_spacy_model()

    # Load the radiology report from file
    if report_file.is_file():

        with open(report_file, "r") as report_obj:
            report = report_obj.read().replace("\n", "")

    else:
        log.error("Unable to establish pathway to report file.")
        os.sys.exit(1)

    # Convert report to spacy container
    doc = nlp(report)

    return list(specified_targets), targets, modifiers, doc
Exemplo n.º 23
0
    """IMPRESSION: Evidence of early pulmonary vascular congestion and interstitial edema. Probable scarring at the medial aspect of the right lung base, with no
     definite consolidation.""", """IMPRESSION:

     1.  2.0 cm cyst of the right renal lower pole.  Otherwise, normal appearance
     of the right kidney with patent vasculature and no sonographic evidence of
     renal artery stenosis.
     2.  Surgically absent left kidney.""",
    """IMPRESSION:  No pneumothorax.""",
    """IMPRESSION: No definite pneumothorax""",
    """IMPRESSION:  New opacity at the left lower lobe consistent with pneumonia."""
]
# relative path correction
script_dir = path.dirname(__file__)
print(script_dir)

modifiers = itemData.instantiateFromCSVtoitemData("file:///" + script_dir +
                                                  "/lexical_rus.tsv")

targets = itemData.instantiateFromCSVtoitemData("file:///" + script_dir +
                                                "/utah_rus.tsv")


# Example function to analyze each sentence
def markup_sentence(s, modifiers, targets, prune_inactive=True):
    """
    """
    markup = pyConText.ConTextMarkup()
    markup.setRawText(s)
    markup.cleanText()
    markup.markItems(modifiers, mode="modifier")
    markup.markItems(targets, mode="target")
    markup.pruneMarks()
Exemplo n.º 24
0
import pyConTextNLP.itemData as itemData
from eHostess.eHostInterface.KnowtatorReader import KnowtatorReader
from eHostess.Analysis.DocumentComparison import Comparison
import eHostess.Analysis.Metrics as Metrics
from eHostess.Analysis.Output import ConvertComparisonsToTSV
from eHostess.MongoDBInterface import MongoTools

# Specify the paths to the directories that contain the notes to be annotated by PyConText
noteDirectories = ['./path/to/dir1/corpus/', './path/to/dir2/corpus/']

# Process the notes using an included sentence splitter, in this case we will use the target-span splitter, which
# divides an input document up into sentences by first identifying target terms in the document and then capturing a
# configurable number of words on either side of the target term. In this case we will capture the target term, the
# ten words before the term, and the six words after the term as a single string. This will be performed on all the
# input notes.
targetsForSpanSplitter = itemData.instantiateFromCSVtoitemData(
    './path/to/targets.tsv')
pyConTextInputObject = TargetSpanSplitter.splitSentencesMultipleDocuments(
    noteDirectories, targetsForSpanSplitter, 10, 6)

# The following line executes PyConText using the default Targets and Modifiers found in
# eHostess/PyConTextInterface/TargetsAndModifiers. The user may specify a different location using the method's options.
DocumentsAnnotatedByPyConText = PyConTextInterface.PerformAnnotation(
    pyConTextInputObject)

# Now bring in the human annotation by first reading the eHost .knowtator files:
knowtatorFileDirectories = ['./path/to/dir1/saved/', './path/to/dir2/saved/']
DocumentsAnnotatedInEHost = KnowtatorReader.parseMultipleKnowtatorFiles(
    knowtatorFileDirectories)

# Compare the two sets of annotations, optionally specifying which classes are equivalent. See the documentation for
# other configuration options:
Exemplo n.º 25
0
        self.mode = mode
        self.dbname = dbname
        self.getDBConnection(self.dbname)

        # get reports to process
        self.cursor.execute(self.query1)
        self.reports = self.cursor.fetchall()

        print "number of reports to process", len(self.reports)
        # Create the pyConTextNLP ConTextDocument. This is the container for all the markups
        self.document = pyConText.ConTextDocument()

        self.modifiers = itemData.itemData()
        self.targets = itemData.itemData()
        for kb in lexical_kb:
            self.modifiers.extend(itemData.instantiateFromCSVtoitemData(kb))
        for kb in domain_kb:
            self.targets.extend(itemData.instantiateFromCSVtoitemData(kb))

        self.debug = debug
        if (self.debug):
            print "debug set to True"
            tmp = os.path.splitext(self.dbname)
            self.debugDir = tmp[0] + "_debug_dir"
            if (not os.path.exists(self.debugDir)):
                os.mkdir(self.debugDir)
        else:
            self.debugDir = ''

    def readRules(self, fname):
        """read the sentence level rules"""