Exemplo n.º 1
0
 def __init__(self, repeat_steps=[], name=None):
     super().__init__(name)
     self.loop_workflow = Workflow(name=name + "_loop")
     self.loop_workflow.data = dict()
     self.data = self.loop_workflow.data
     for step in repeat_steps:
         step.pos_id = len(self.loop_workflow)
         step.setWorkflow(self.loop_workflow)
         self.appendRepeatStep(step)
     pass
Exemplo n.º 2
0
 def start(self):
     cr = ConfigReader()
     self.workflow = Workflow([
         IntroStep(
             '<h2>Welcome to SmartAnno!</h2><h4>Do you want to start from beginning or continue previous reviewing? </h4>',
             name='intro'),
         DBInitiater(name='db_initiator'),
         DirChooser(name='choosedir'),
         ReadFiles(name='readfiles'),
         DocsToDB(name='save2db'),
         TaskChooser(name='tasknamer'),
         DataSetChooser(
             name='dataset_chooser',
             description='<h4>Choose which dateaset you want to use: </h4>'
         ),
         AnnotationTypeDef(
             '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
             'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
             'for each sample. </p>',
             name='types'),
         KeywordsFiltering(name='keywords'),
         # PreviousNextIntSlider(value=60, min=0, max=100, step=10,
         #                       description='<h4>Percentage to Filter: </h4><p>Choose how many percent of the samples '
         #                                   'you want to use the keywords filter.</p>', name='percent2filter'),
         KeywordsUMLSExtenderSetup(name='umls_extender_setup'),
         KeywordsUMLSExtender(
             name='umls_extender',
             sources=cr.getValue("umls/sources"),
             filter_by_length=cr.getValue("umls/filter_by_length"),
             filter_by_contains=cr.getValue("umls/filter_by_contains"),
             max_query=cr.getValue("umls/max_query")),
         KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'),
         KeywordsEmbeddingExtender(name='w_e_extender', max_query=40),
         ReviewRBInit(name="rb_review_init"),
         ReviewRBLoop(name='rb_review'),
         PreviousNextHTML(
             description=
             '<h2>Congratuations!</h2><h4>You have finished the initial review '
             'on the rule-base preannotations. </h4>',
             name='rb_review_done'),
         ReviewMLInit(name='ml_review_init'),
         ReviewMLLoop(name='ml_review',
                      ml_classifier_cls=self.ml_classifier_cls),
         PreviousNextHTML(
             name='finish',
             description=
             '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. '
         )
     ])
     self.workflow.start(False)
     pass
Exemplo n.º 3
0
def evaluate(task_name='language', classifiers=[LogisticBOWClassifiers]):
    ConfigReader()

    dbi = DBInitiater(name='db_initiator')
    tc = TaskChooser(name='tasknamer')
    dsc = DataSetChooser(
        name='dataset_chooser',
        description='<h4>Choose which dateaset you want to use: </h4>')
    anno_type = AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types')
    kf = KeywordsFiltering(name='keywords')
    ri = ReviewRBInit(name="rb_review_init")
    # mi=ReviewMLInit(name='ml_review_init')
    # ml=ReviewMLLoop(name='ml_review', ml_classifier_cls=SVMBOWClassifier)
    wf = Workflow([
        dbi, dsc, anno_type, kf, ri
        # ,mi,ml
    ])
    wf.task_name = task_name
    wf.start()
    dbi.complete()
    dsc.complete()
    anno_type.complete()
    kf.complete()
    ri.complete()
    for key, value in wf.samples.items():
        print(key, len(value))
    docs = wf.samples['docs']
    annos = wf.samples['annos']
    reviewed_docs = {
        doc_id: anno.REVIEWED_TYPE
        for doc_id, anno in annos.items() if anno.REVIEWED_TYPE is not None
    }
    x = [doc.TEXT for doc in docs[:len(reviewed_docs)]]
    y = list(reviewed_docs.values())
    print(y)
    logging.getLogger().setLevel(logging.DEBUG)
    for cl in classifiers:
        cl_instance = cl(task_name=task_name)
        print("\n\nReport performance of {}:".format(cl.__name__))
        cl_instance.train(x, y)
Exemplo n.º 4
0
class LoopRepeatSteps(Step):
    """Wrapping multiple RepeatStep(s) in a workflow to build a mega Step"""
    def __init__(self, repeat_steps=[], name=None):
        super().__init__(name)
        self.loop_workflow = Workflow(name=name + "_loop")
        self.loop_workflow.data = dict()
        self.data = self.loop_workflow.data
        for step in repeat_steps:
            step.pos_id = len(self.loop_workflow)
            step.setWorkflow(self.loop_workflow)
            self.appendRepeatStep(step)
        pass

    def appendRepeatStep(self, newRepeatStep):
        if len(self.loop_workflow) > 0:
            previous_step = self.loop_workflow.steps[-1]
            previous_step.setNextStep(newRepeatStep)
        else:
            previous_step = self.previous_step
        #  first step in the loop, set previous step to the previous step outside the loop
        if len(self.loop_workflow.steps) == 0:
            newRepeatStep.setPreviousStep(previous_step)
        #  if the loop master step has next step, assign the complete buttons of repeat steps linked to that step.
        if self.next_step is not None:
            newRepeatStep.setCompleteStep(self.next_step)

        self.loop_workflow.append(newRepeatStep)

        pass

    def setNextStep(self, next_step):
        # need to update every embedded repeat steps
        for repeat_step in self.loop_workflow.steps:
            repeat_step.setCompleteStep(next_step)
        super().setNextStep(next_step)
        pass

    def start(self):
        self.loop_workflow.start()
        pass
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef
from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering
from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML
from SmartAnno.utils.ReviewRBInit import ReviewRBInit
from SmartAnno.utils.ReviewRBLoop import ReviewRBLoop
from SmartAnno.utils.ReviewMLInit import ReviewMLInit
from SmartAnno.utils.ReviewMLLoop import ReviewMLLoop
from SmartAnno.models.logistic.LogisticBOWClassifiers import LogisticBOWClassifier
from SmartAnno.utils.DataSetChooser import DataSetChooser

logging.getLogger().setLevel(logging.DEBUG)

ConfigReader('../conf/smartanno_conf.json')

wf = Workflow(config_file=ConfigReader.config_file)
wf.api_key = ConfigReader.getValue("api_key")
wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite',
             sqlalchemy_dao.POOL_DISABLED)
wf.task_name = 'language'
wf.append(
    AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types'))
wf.append(KeywordsFiltering(name='keywords'))
wf.append(
    DataSetChooser(
        name='dataset_chooser',
        description='<h4>Choose which dateaset you want to use: </h4>'))
from threading import Thread


def prepareGloveModel():
    ConfigReader('../conf/smartanno_conf.json')
    glove_path = ConfigReader.getValue('glove/model_path')
    glove_vocab = ConfigReader.getValue('glove/vocab')
    glove_vector = ConfigReader.getValue('glove/vector')
    GloveModel(word2vec_file=glove_path, vocab=glove_vocab, vect=glove_vector)
    gm = GloveModel.glove_model


thread_gm = Thread(target=prepareGloveModel)
thread_gm.start()

wf = Workflow(config_file=ConfigReader.config_file)
wf.api_key = ConfigReader.getValue("api_key")
wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite',
             sqlalchemy_dao.POOL_DISABLED)
wf.task_name = 'language'
wf.append(
    AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types'))
wf.append(KeywordsFiltering(name='keywords'))
wf.append(KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'))
wf.append(KeywordsEmbeddingExtender(name='w_e_extender', max_query=40))

wf.start()
Exemplo n.º 7
0
from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.IntroStep import IntroStep
ConfigReader('../conf/smartanno_conf2.json')
intro = IntroStep(
    '<h2>Welcome to SmartAnno!</h2><h4>First, let&apos;s import txt data from a directory. </h4>',
    name='intro')
wf = Workflow([
    intro,
    PreviousNextHTML(
        name='finish',
        description=
        '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. '
    )
])
wf.start()
intro.navigate(intro.branch_buttons[0])
Exemplo n.º 8
0
class Main:
    """Define and execute a workflow"""
    def __init__(self, ml_classifier_cls=LogisticBOWClassifier):

        self.data = None
        self.dir_chooser = None
        self.data = None
        self.status = NotTrained
        self.workflow = None
        self.ml_classifier_cls = ml_classifier_cls
        self.__setUpStage()
        pass

    def __setUpStage(self):
        style = '''<style>.output_wrapper, .output {
                    height:auto !important;
                    max-height:1000px;  /* your desired max-height here */
                }
                .output_scroll {
                    box-shadow:none !important;
                    webkit-box-shadow:none !important;
                }
                </style>'''
        display(widgets.HTML(style))
        pass

    def start(self):
        cr = ConfigReader()
        self.workflow = Workflow([
            IntroStep(
                '<h2>Welcome to SmartAnno!</h2><h4>Do you want to start from beginning or continue previous reviewing? </h4>',
                name='intro'),
            DBInitiater(name='db_initiator'),
            DirChooser(name='choosedir'),
            ReadFiles(name='readfiles'),
            DocsToDB(name='save2db'),
            TaskChooser(name='tasknamer'),
            DataSetChooser(
                name='dataset_chooser',
                description='<h4>Choose which dateaset you want to use: </h4>'
            ),
            AnnotationTypeDef(
                '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
                'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
                'for each sample. </p>',
                name='types'),
            KeywordsFiltering(name='keywords'),
            # PreviousNextIntSlider(value=60, min=0, max=100, step=10,
            #                       description='<h4>Percentage to Filter: </h4><p>Choose how many percent of the samples '
            #                                   'you want to use the keywords filter.</p>', name='percent2filter'),
            KeywordsUMLSExtenderSetup(name='umls_extender_setup'),
            KeywordsUMLSExtender(
                name='umls_extender',
                sources=cr.getValue("umls/sources"),
                filter_by_length=cr.getValue("umls/filter_by_length"),
                filter_by_contains=cr.getValue("umls/filter_by_contains"),
                max_query=cr.getValue("umls/max_query")),
            KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'),
            KeywordsEmbeddingExtender(name='w_e_extender', max_query=40),
            ReviewRBInit(name="rb_review_init"),
            ReviewRBLoop(name='rb_review'),
            PreviousNextHTML(
                description=
                '<h2>Congratuations!</h2><h4>You have finished the initial review '
                'on the rule-base preannotations. </h4>',
                name='rb_review_done'),
            ReviewMLInit(name='ml_review_init'),
            ReviewMLLoop(name='ml_review',
                         ml_classifier_cls=self.ml_classifier_cls),
            PreviousNextHTML(
                name='finish',
                description=
                '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. '
            )
        ])
        self.workflow.start(False)
        pass

    def getData(self):
        return self.workflow.data

    def getLastStepData(self):
        length = len(self.workflow.data)
        if length > 0:
            return self.workflow.data[length - 1]
        else:
            return None

    def navigate(self):
        # TODO
        pass
Exemplo n.º 9
0
    def testRBLoop(self):
        logging.getLogger().setLevel(logging.WARN)

        ConfigReader()
        wf = Workflow()
        rb = ReviewRBInit(name="rb_review_init")
        wf.append(rb)
        rv = ReviewRBLoop(name='rb_review')
        wf.append(rv)
        wf.append(
            PreviousNextHTML(
                '<h2>Welcome to SmartAnno!</h2><h4>First, let&apos;s import txt data from a directory. </h4>',
                name='intro'))

        wf.filters = {'TypeA': ['heart'], 'TypeB': ['exam']}
        wf.types = ['TypeA', 'TypeB']
        wf.task_id = 1
        wf.umls_extended = {}
        wf.we_extended = {}
        wf.dao = Dao('sqlite+pysqlite:///data/demo.sqlite',
                     sqlalchemy_dao.POOL_DISABLED)
        wf.start()
        if len(rb.branch_buttons) == 0:
            # if no records in the db, the optional buttons won't show
            rb.sample_size_input.value = 3
            rb.complete()
            wf.start()
        print([doc.DOC_ID for doc in rb.data['docs']])
        print([
            anno.REVIEWED_TYPE for anno in wf.steps[0].data['annos'].values()
        ])
        rb.sample_size_input.value = 1
        rb.navigate(rb.branch_buttons[1])
        pass
Exemplo n.º 10
0
from sqlalchemy_dao import Dao

from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.db.ORMs import Filter
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef
from SmartAnno.utils.IntroStep import IntroStep
from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering
from SmartAnno.utils.KeywordsUMLSExtender import KeywordsUMLSExtender
from SmartAnno.utils.KeywordsUMLSExtenderSetup import KeywordsUMLSExtenderSetup

logging.getLogger().setLevel(logging.DEBUG)

ConfigReader('../conf/smartanno_conf.json')

wf = Workflow(config_file=ConfigReader.config_file)
wf.api_key = ConfigReader.getValue("api_key")
wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED)
wf.task_name = 'language'
wf.append(AnnotationTypeDef(
    '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
    'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
    'for each sample. </p>', name='types'))
wf.append(KeywordsFiltering(
    name='keywords'))
wf.append(KeywordsUMLSExtenderSetup(name='umls_extender_setup'))
wf.append(KeywordsUMLSExtender(name='umls_extender', sources=ConfigReader.getValue("umls/sources"),
                               filter_by_length=ConfigReader.getValue("umls/filter_by_length"),
                               filter_by_contains=ConfigReader.getValue("umls/filter_by_contains"),
                               max_query=ConfigReader.getValue("umls/max_query")))
wf.append(
Exemplo n.º 11
0
from SmartAnno.gui.DirChooser import DirChooser
from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.gui.FileIO import ReadFiles
from SmartAnno.gui.PreviousNextWidgets import PreviousNext
import glob
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.DBInitiater import DBInitiater
from SmartAnno.utils.DocsToDB import DocsToDB
import pandas as pd

from SmartAnno.utils.IntroStep import IntroStep

ConfigReader()
wf = Workflow([
    DBInitiater(name='db_initiator'),
    DirChooser(name='choosedir'),
    ReadFiles(name='readfiles'),
    DocsToDB(name='save2db')
])
wf.to_continue = False

wf.getStepByName('readfiles').remove_old = True
#
# wf.getStepByName('db_initiator').need_import = True
wf.start()
wf.getStepByName('db_initiator').toggle.value = 'Yes'
wf.dao = Dao('sqlite+pysqlite:///../data/test2.sqlite',
             sqlalchemy_dao.POOL_DISABLED)
wf.dbpath = '../data/test2.sqlite'
wf.getStepByName('db_initiator').complete()
wf.getStepByName(
    'choosedir').path = '/home/brokenjade/Documents/N2C2/smalltest/'