def __init__(self, repeat_steps=[], name=None): super().__init__(name) self.loop_workflow = Workflow(name=name + "_loop") self.loop_workflow.data = dict() self.data = self.loop_workflow.data for step in repeat_steps: step.pos_id = len(self.loop_workflow) step.setWorkflow(self.loop_workflow) self.appendRepeatStep(step) pass
def start(self): cr = ConfigReader() self.workflow = Workflow([ IntroStep( '<h2>Welcome to SmartAnno!</h2><h4>Do you want to start from beginning or continue previous reviewing? </h4>', name='intro'), DBInitiater(name='db_initiator'), DirChooser(name='choosedir'), ReadFiles(name='readfiles'), DocsToDB(name='save2db'), TaskChooser(name='tasknamer'), DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>' ), AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types'), KeywordsFiltering(name='keywords'), # PreviousNextIntSlider(value=60, min=0, max=100, step=10, # description='<h4>Percentage to Filter: </h4><p>Choose how many percent of the samples ' # 'you want to use the keywords filter.</p>', name='percent2filter'), KeywordsUMLSExtenderSetup(name='umls_extender_setup'), KeywordsUMLSExtender( name='umls_extender', sources=cr.getValue("umls/sources"), filter_by_length=cr.getValue("umls/filter_by_length"), filter_by_contains=cr.getValue("umls/filter_by_contains"), max_query=cr.getValue("umls/max_query")), KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'), KeywordsEmbeddingExtender(name='w_e_extender', max_query=40), ReviewRBInit(name="rb_review_init"), ReviewRBLoop(name='rb_review'), PreviousNextHTML( description= '<h2>Congratuations!</h2><h4>You have finished the initial review ' 'on the rule-base preannotations. </h4>', name='rb_review_done'), ReviewMLInit(name='ml_review_init'), ReviewMLLoop(name='ml_review', ml_classifier_cls=self.ml_classifier_cls), PreviousNextHTML( name='finish', description= '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. ' ) ]) self.workflow.start(False) pass
def evaluate(task_name='language', classifiers=[LogisticBOWClassifiers]): ConfigReader() dbi = DBInitiater(name='db_initiator') tc = TaskChooser(name='tasknamer') dsc = DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>') anno_type = AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types') kf = KeywordsFiltering(name='keywords') ri = ReviewRBInit(name="rb_review_init") # mi=ReviewMLInit(name='ml_review_init') # ml=ReviewMLLoop(name='ml_review', ml_classifier_cls=SVMBOWClassifier) wf = Workflow([ dbi, dsc, anno_type, kf, ri # ,mi,ml ]) wf.task_name = task_name wf.start() dbi.complete() dsc.complete() anno_type.complete() kf.complete() ri.complete() for key, value in wf.samples.items(): print(key, len(value)) docs = wf.samples['docs'] annos = wf.samples['annos'] reviewed_docs = { doc_id: anno.REVIEWED_TYPE for doc_id, anno in annos.items() if anno.REVIEWED_TYPE is not None } x = [doc.TEXT for doc in docs[:len(reviewed_docs)]] y = list(reviewed_docs.values()) print(y) logging.getLogger().setLevel(logging.DEBUG) for cl in classifiers: cl_instance = cl(task_name=task_name) print("\n\nReport performance of {}:".format(cl.__name__)) cl_instance.train(x, y)
class LoopRepeatSteps(Step): """Wrapping multiple RepeatStep(s) in a workflow to build a mega Step""" def __init__(self, repeat_steps=[], name=None): super().__init__(name) self.loop_workflow = Workflow(name=name + "_loop") self.loop_workflow.data = dict() self.data = self.loop_workflow.data for step in repeat_steps: step.pos_id = len(self.loop_workflow) step.setWorkflow(self.loop_workflow) self.appendRepeatStep(step) pass def appendRepeatStep(self, newRepeatStep): if len(self.loop_workflow) > 0: previous_step = self.loop_workflow.steps[-1] previous_step.setNextStep(newRepeatStep) else: previous_step = self.previous_step # first step in the loop, set previous step to the previous step outside the loop if len(self.loop_workflow.steps) == 0: newRepeatStep.setPreviousStep(previous_step) # if the loop master step has next step, assign the complete buttons of repeat steps linked to that step. if self.next_step is not None: newRepeatStep.setCompleteStep(self.next_step) self.loop_workflow.append(newRepeatStep) pass def setNextStep(self, next_step): # need to update every embedded repeat steps for repeat_step in self.loop_workflow.steps: repeat_step.setCompleteStep(next_step) super().setNextStep(next_step) pass def start(self): self.loop_workflow.start() pass
from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML from SmartAnno.utils.ReviewRBInit import ReviewRBInit from SmartAnno.utils.ReviewRBLoop import ReviewRBLoop from SmartAnno.utils.ReviewMLInit import ReviewMLInit from SmartAnno.utils.ReviewMLLoop import ReviewMLLoop from SmartAnno.models.logistic.LogisticBOWClassifiers import LogisticBOWClassifier from SmartAnno.utils.DataSetChooser import DataSetChooser logging.getLogger().setLevel(logging.DEBUG) ConfigReader('../conf/smartanno_conf.json') wf = Workflow(config_file=ConfigReader.config_file) wf.api_key = ConfigReader.getValue("api_key") wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.task_name = 'language' wf.append( AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types')) wf.append(KeywordsFiltering(name='keywords')) wf.append( DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>'))
from threading import Thread def prepareGloveModel(): ConfigReader('../conf/smartanno_conf.json') glove_path = ConfigReader.getValue('glove/model_path') glove_vocab = ConfigReader.getValue('glove/vocab') glove_vector = ConfigReader.getValue('glove/vector') GloveModel(word2vec_file=glove_path, vocab=glove_vocab, vect=glove_vector) gm = GloveModel.glove_model thread_gm = Thread(target=prepareGloveModel) thread_gm.start() wf = Workflow(config_file=ConfigReader.config_file) wf.api_key = ConfigReader.getValue("api_key") wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.task_name = 'language' wf.append( AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types')) wf.append(KeywordsFiltering(name='keywords')) wf.append(KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup')) wf.append(KeywordsEmbeddingExtender(name='w_e_extender', max_query=40)) wf.start()
from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.IntroStep import IntroStep ConfigReader('../conf/smartanno_conf2.json') intro = IntroStep( '<h2>Welcome to SmartAnno!</h2><h4>First, let's import txt data from a directory. </h4>', name='intro') wf = Workflow([ intro, PreviousNextHTML( name='finish', description= '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. ' ) ]) wf.start() intro.navigate(intro.branch_buttons[0])
class Main: """Define and execute a workflow""" def __init__(self, ml_classifier_cls=LogisticBOWClassifier): self.data = None self.dir_chooser = None self.data = None self.status = NotTrained self.workflow = None self.ml_classifier_cls = ml_classifier_cls self.__setUpStage() pass def __setUpStage(self): style = '''<style>.output_wrapper, .output { height:auto !important; max-height:1000px; /* your desired max-height here */ } .output_scroll { box-shadow:none !important; webkit-box-shadow:none !important; } </style>''' display(widgets.HTML(style)) pass def start(self): cr = ConfigReader() self.workflow = Workflow([ IntroStep( '<h2>Welcome to SmartAnno!</h2><h4>Do you want to start from beginning or continue previous reviewing? </h4>', name='intro'), DBInitiater(name='db_initiator'), DirChooser(name='choosedir'), ReadFiles(name='readfiles'), DocsToDB(name='save2db'), TaskChooser(name='tasknamer'), DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>' ), AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types'), KeywordsFiltering(name='keywords'), # PreviousNextIntSlider(value=60, min=0, max=100, step=10, # description='<h4>Percentage to Filter: </h4><p>Choose how many percent of the samples ' # 'you want to use the keywords filter.</p>', name='percent2filter'), KeywordsUMLSExtenderSetup(name='umls_extender_setup'), KeywordsUMLSExtender( name='umls_extender', sources=cr.getValue("umls/sources"), filter_by_length=cr.getValue("umls/filter_by_length"), filter_by_contains=cr.getValue("umls/filter_by_contains"), max_query=cr.getValue("umls/max_query")), KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'), KeywordsEmbeddingExtender(name='w_e_extender', max_query=40), ReviewRBInit(name="rb_review_init"), ReviewRBLoop(name='rb_review'), PreviousNextHTML( description= '<h2>Congratuations!</h2><h4>You have finished the initial review ' 'on the rule-base preannotations. </h4>', name='rb_review_done'), ReviewMLInit(name='ml_review_init'), ReviewMLLoop(name='ml_review', ml_classifier_cls=self.ml_classifier_cls), PreviousNextHTML( name='finish', description= '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. ' ) ]) self.workflow.start(False) pass def getData(self): return self.workflow.data def getLastStepData(self): length = len(self.workflow.data) if length > 0: return self.workflow.data[length - 1] else: return None def navigate(self): # TODO pass
def testRBLoop(self): logging.getLogger().setLevel(logging.WARN) ConfigReader() wf = Workflow() rb = ReviewRBInit(name="rb_review_init") wf.append(rb) rv = ReviewRBLoop(name='rb_review') wf.append(rv) wf.append( PreviousNextHTML( '<h2>Welcome to SmartAnno!</h2><h4>First, let's import txt data from a directory. </h4>', name='intro')) wf.filters = {'TypeA': ['heart'], 'TypeB': ['exam']} wf.types = ['TypeA', 'TypeB'] wf.task_id = 1 wf.umls_extended = {} wf.we_extended = {} wf.dao = Dao('sqlite+pysqlite:///data/demo.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.start() if len(rb.branch_buttons) == 0: # if no records in the db, the optional buttons won't show rb.sample_size_input.value = 3 rb.complete() wf.start() print([doc.DOC_ID for doc in rb.data['docs']]) print([ anno.REVIEWED_TYPE for anno in wf.steps[0].data['annos'].values() ]) rb.sample_size_input.value = 1 rb.navigate(rb.branch_buttons[1]) pass
from sqlalchemy_dao import Dao from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.db.ORMs import Filter from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef from SmartAnno.utils.IntroStep import IntroStep from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering from SmartAnno.utils.KeywordsUMLSExtender import KeywordsUMLSExtender from SmartAnno.utils.KeywordsUMLSExtenderSetup import KeywordsUMLSExtenderSetup logging.getLogger().setLevel(logging.DEBUG) ConfigReader('../conf/smartanno_conf.json') wf = Workflow(config_file=ConfigReader.config_file) wf.api_key = ConfigReader.getValue("api_key") wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.task_name = 'language' wf.append(AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types')) wf.append(KeywordsFiltering( name='keywords')) wf.append(KeywordsUMLSExtenderSetup(name='umls_extender_setup')) wf.append(KeywordsUMLSExtender(name='umls_extender', sources=ConfigReader.getValue("umls/sources"), filter_by_length=ConfigReader.getValue("umls/filter_by_length"), filter_by_contains=ConfigReader.getValue("umls/filter_by_contains"), max_query=ConfigReader.getValue("umls/max_query"))) wf.append(
from SmartAnno.gui.DirChooser import DirChooser from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.gui.FileIO import ReadFiles from SmartAnno.gui.PreviousNextWidgets import PreviousNext import glob from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.DBInitiater import DBInitiater from SmartAnno.utils.DocsToDB import DocsToDB import pandas as pd from SmartAnno.utils.IntroStep import IntroStep ConfigReader() wf = Workflow([ DBInitiater(name='db_initiator'), DirChooser(name='choosedir'), ReadFiles(name='readfiles'), DocsToDB(name='save2db') ]) wf.to_continue = False wf.getStepByName('readfiles').remove_old = True # # wf.getStepByName('db_initiator').need_import = True wf.start() wf.getStepByName('db_initiator').toggle.value = 'Yes' wf.dao = Dao('sqlite+pysqlite:///../data/test2.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.dbpath = '../data/test2.sqlite' wf.getStepByName('db_initiator').complete() wf.getStepByName( 'choosedir').path = '/home/brokenjade/Documents/N2C2/smalltest/'