def prepareGloveModel():
    ConfigReader('../conf/smartanno_conf.json')
    glove_path = ConfigReader.getValue('glove/model_path')
    glove_vocab = ConfigReader.getValue('glove/vocab')
    glove_vector = ConfigReader.getValue('glove/vector')
    GloveModel(word2vec_file=glove_path, vocab=glove_vocab, vect=glove_vector)
    gm = GloveModel.glove_model
示例#2
0
    def start(self):
        if ConfigReader.getValue("glove/model_path") is None or len(ConfigReader.getValue("glove/model_path")) == 0:
            self.workflow.steps[self.pos_id + 2].start()
            return

        if not hasattr(self.workflow, 'we_extended'):
            self.workflow.we_extended = dict()
        rows = self.showWords(self.workflow.filters)
        self.box = widgets.VBox(rows, layout=widgets.Layout(display='flex', flex_grown='column'))
        display(self.box)
        pass
 def loadDefaultConfig(self):
     if self.sources is None or len(self.sources) == 0:
         self.sources = ['SNOMEDCT_US']
         ConfigReader.setValue("umls/sources", self.sources)
     if self.filter_by_length is None:
         self.filter_by_length = 0
         ConfigReader.setValue("umls/filter_by_length", 0)
     if self.filter_by_contains is None:
         self.filter_by_contains = True
         ConfigReader.setValue("umls/filter_by_contains", True)
     if self.max_query is None:
         self.max_query = 50
         ConfigReader.setValue("umls/max_query", 50)
     ConfigReader.saveConfig()
     pass
示例#4
0
 def __init__(self, name=None):
     super().__init__(name)
     self.dao = None
     self.dbpath = ''
     self.remove_old = False
     self.dataset_name = 'orig'
     self.whoosh_root = ConfigReader.getValue("whoosh/root_path")
     self.html1 = widgets.HTML('<h4>Give a name for this dataset: </h4>')
     self.dataset_name_input = None
     self.html2 = None
     self.toggle = widgets.ToggleButtons(
         options=['TextBlob_Splitter', 'PyRuSh', 'Not_To_Split'],
         description='',
         disabled=False,
         value='Not_To_Split',
         button_style='',  # 'success', 'info', 'warning', 'danger' or ''
         tooltips=[
             'Use TextBlob sentence splitter',
             'Use PyRuSH to split sentences', 'don\'t split'
         ],
         layout=widgets.Layout(width='70%')
         #     icons=['check'] * 3
     )
     self.data_step = None
     pass
示例#5
0
    def testRBLoop(self):
        logging.getLogger().setLevel(logging.WARN)

        ConfigReader()
        wf = Workflow()
        rb = ReviewRBInit(name="rb_review_init")
        wf.append(rb)
        rv = ReviewRBLoop(name='rb_review')
        wf.append(rv)
        wf.append(
            PreviousNextHTML(
                '<h2>Welcome to SmartAnno!</h2><h4>First, let&apos;s import txt data from a directory. </h4>',
                name='intro'))

        wf.filters = {'TypeA': ['heart'], 'TypeB': ['exam']}
        wf.types = ['TypeA', 'TypeB']
        wf.task_id = 1
        wf.umls_extended = {}
        wf.we_extended = {}
        wf.dao = Dao('sqlite+pysqlite:///data/demo.sqlite',
                     sqlalchemy_dao.POOL_DISABLED)
        wf.start()
        if len(rb.branch_buttons) == 0:
            # if no records in the db, the optional buttons won't show
            rb.sample_size_input.value = 3
            rb.complete()
            wf.start()
        print([doc.DOC_ID for doc in rb.data['docs']])
        print([
            anno.REVIEWED_TYPE for anno in wf.steps[0].data['annos'].values()
        ])
        rb.sample_size_input.value = 1
        rb.navigate(rb.branch_buttons[1])
        pass
示例#6
0
    def __init__(self,
                 description='',
                 name=str(Step.global_id + 1),
                 ml_classifier_cls=LogisticBOWClassifier):
        self.sample_size_input = None
        self.percent_slider = None
        self.samples = {"contain": [], "notcontain": []}
        self.box = None
        self.data = None
        self.docs = None
        self.annos = None
        self.reviewed_docs = None
        self.reviewed_pos = None
        self.leftover = None
        self.ready = False
        # reset, continue, addmore,
        self.move_next_option = ''

        self.previousReviewed = OrderedDict()
        self.learning_pace = ConfigReader.getValue('review/ml_learning_pace')
        self.un_reviewed = 0
        self.parameters = dict()
        self.parameter_inputs = dict()
        self.ml_classifier_cls = ml_classifier_cls
        super().__init__(name=name)
        pass
    def __init__(self,
                 targets=None,
                 modifiers=None,
                 feature_inference_rule=None,
                 document_inference_rule=None,
                 rush_rule=None,
                 expected_values=[],
                 save_markups=True):
        self.document_inferencer = DocumentInferencer(document_inference_rule)
        self.feature_inferencer = FeatureInferencer(feature_inference_rule)
        self.conclusions = []
        self.modifiers = modifiers
        self.targets = targets
        self.save_markups = save_markups
        self.expected_values = [value.lower() for value in expected_values]
        self.saved_markups_map = dict()
        self.pyrush = None
        if rush_rule is None or not os.path.isfile(rush_rule):
            rush_rule = ConfigReader.getValue('rush_rules_path')
        if rush_rule is not None and os.path.isfile(rush_rule):
            self.pyrush = RuSH(rush_rule)
        else:
            logMsg(("File not found", os.path.abspath(rush_rule)))
        self.last_doc_name = ''

        if modifiers is not None and targets is not None:
            if isinstance(modifiers, str) and isinstance(targets, str):
                if (modifiers.endswith('.csv') or modifiers.endswith('.tsv') or modifiers.endswith(
                        '.txt') or modifiers.endswith('.yml')) \
                        and (targets.endswith('.csv') or targets.endswith('.tsv') or targets.endswith(
                    '.txt') or targets.endswith('.yml') or targets.startswith('Lex\t')):
                    self.setModifiersTargetsFromFiles(modifiers, targets)
            else:
                self.setModifiersTargets(modifiers, targets)
        RBDocumentClassifier.instance = self
示例#8
0
 def __init__(self, apikey=None):
     # self.username=username
     # self.password=password
     if apikey is not None:
         self.apikey = apikey
     else:
         self.apikey = ConfigReader.getValue('api_key')
     self.service = "http://umlsks.nlm.nih.gov"
示例#9
0
    def start(self):
        if not hasattr(self.workflow, 'dao') or self.workflow.dao is None:
            print(self.workflow.config_file)
            self.dbpath = ConfigReader(self.workflow.config_file).getValue('db_path')
            self.db_config = ConfigReader(self.workflow.config_file).getValue('db_header') + self.dbpath

            if os.path.isfile(self.dbpath):
                self.initDao(self.db_config)
                self.displayOptions()
            else:
                self.initDao(self.db_config)
                self.createSQLTables()
                self.need_import = True
                self.next_step.start()
        else:
            self.next_step.start()
        pass
示例#10
0
 def saveGloveConfig(self):
     self.glove_path = self.glove_path_input.value
     self.glove_vocab = self.glove_vocab_input.value
     self.glove_vector = self.glove_vector_input.value
     self.workflow.glove_path = self.glove_path
     ConfigReader.setValue("glove/vocab", int(self.glove_vocab))
     ConfigReader.setValue("glove/vector", int(self.glove_vector))
     ConfigReader.setValue("glove/model_path", self.glove_path)
     ConfigReader.saveConfig()
     pass
示例#11
0
    def __init__(self, description='', name=None):
        self.glove_path = ConfigReader.getValue('glove/model_path')
        self.glove_vocab = ConfigReader.getValue('glove/vocab')
        self.glove_vector = ConfigReader.getValue('glove/vector')
        # widgets to take the user inputs
        self.glove_path_input = None
        self.glove_vocab_input = None
        self.glove_vector_input = None
        self.api_key_input = None

        if self.glove_vocab is None:
            self.glove_vocab = 1900000

        if self.glove_vector is None:
            self.glove_vector = 300
        self.html = widgets.HTML(value=description)
        super().__init__(name)
        pass
    def complete(self):
        no_word_selected = True
        for type_name, toggle in self.to_umls_ext_filters.items():
            self.to_ext_words[type_name] = TreeSet(toggle.value)
            if no_word_selected and len(self.to_ext_words[type_name]) > 0:
                no_word_selected = False

        if not no_word_selected:
            self.workflow.to_ext_words = self.to_ext_words
            if self.api_key is None:
                self.api_key = self.api_input.value
                self.workflow.api_key = self.api_key
                ConfigReader.setValue("api_key", self.api_key)
                ConfigReader.saveConfig()
        else:
            self.setNextStep(self.workflow.steps[self.pos_id + 2])
            self.workflow.steps[self.pos_id + 2].setPreviousStep(self)
        super().complete()
        pass
 def __init__(
         self,
         description='<h4>Extend keywords through <b>UMLS</b></h4><p>Please select which keywords you want to '
     'check the synonyms from UMLS:',
         name=None):
     self.api_key = ConfigReader.getValue('api_key')
     self.title = widgets.HTML(value=description)
     self.to_ext_words = dict()
     self.to_umls_ext_filters = dict()
     self.api_input = None
     super().__init__(name)
示例#14
0
    def __init__(self, name=str(Step.global_id + 1), **kwargs):
        super().__init__([], name=name)
        self.docs = []
        self.data = dict()
        self.annos = dict()
        self.reviewed_docs = dict()
        self.threshold = ConfigReader.getValue('review/rb_model_threshold')
        self.nlp = None
        self.js = '''<script>
function setFocusToTextBox(){
    var spans = document.getElementsByClassName("highlighter");
    var id=document.getElementById('d1').pos
    if (id===undefined){
      id=0
    }          
    if (id>=spans.length){
        id=0
    }
    var topPos = spans[id].offsetTop;    
    dv=document.getElementById('d1')
    dv.scrollTop = topPos-20;
    dv.pos=id+1;
}
</script>'''
        self.end_js = '''<script>document.getElementById('d1').pos=0;topPos=0;</script>'''
        self.matcher = None
        self.metaColumns = ConfigReader().getValue("review/meta_columns")
        self.div_height = ConfigReader().getValue("review/div_height")
        logMsg(('self.div_height:', self.div_height))
        self.show_meta_name = ConfigReader().getValue("review/show_meta_name")
        self.hightligh_span_tag = ' <span class="highlighter" style="background-color:  %s ">' % ConfigReader(
        ).getValue("review/highlight_color")
        if 'rush_rule' in kwargs:
            self.rush_rule = kwargs['rush_rule']
        else:
            self.rush_rule = ConfigReader.getValue('rush_rules_path')

        pass
示例#15
0
 def requestUMLSAPIKey(self, rows):
     api_key = ConfigReader.getValue("api_key")
     if api_key is None or len(api_key) == 0:
         rows.append(
             widgets.HTML(
                 value=
                 '<h4>Set up your Glove model</h4><p>In order to use word embedding, you need '
                 'to tell where the glove model locates:</p>'))
         self.api_key_input = widgets.Text(value='',
                                           placeholder='',
                                           description='',
                                           disabled=False)
         rows.append(self.api_key_input)
         rows += self.addSeparator()
示例#16
0
 def requestUMLSAPIKey(self, rows):
     api_key = ConfigReader.getValue("api_key")
     if api_key is None or len(api_key) == 0:
         rows.append(
             widgets.HTML(
                 value='<h4>Set your API Key</h4><p>In order to use the UMLS synonym checking module, you need to set'
                       ' up your API key: (<a href="https://www.nlm.nih.gov/research/umls/user_education/quick_tours/'
                       'UTS-API/UTS_REST_API_Authentication.html" target="_blank">How to get your API Key_at 01:12 from'
                       ' beginning. </a>)</p><p>If you do not set the api key, the UMLS synonym extender will be '
                       '<b>skipped</b>.</p>'))
         self.api_key_input = widgets.Text(value='',
                                           placeholder='',
                                           description='', disabled=False)
         rows.append(self.api_key_input)
         rows += self.addSeparator()
 def __init__(self, **kwargs):
     self.sample_size = 0
     self.previous_sampled_ids = kwargs['previous_sampled_ids']
     self.dao = kwargs['dao']
     self.dataset_id = 'origin_doc' if 'dataset_id' not in kwargs else kwargs[
         'dataset_id']
     self.ignore_case = True
     self.whoosh_root = ConfigReader.getValue('whoosh/root_path')
     self.grouped_ids = dict()
     self.all_contain_ids = set()
     self.available_not_contain = 0
     self.new_available_not_contain = 0
     self.new_ids = dict()
     self.current_stats = dict()
     pass
示例#18
0
 def __init__(self,
              description='',
              name=str(Step.global_id + 1),
              sampler_cls: type = KeywordStratefiedSampler):
     super().__init__(name=name)
     self.toggle = widgets.ToggleButtons(
         options=sample_options,
         value=sample_options[-1],
         description='What to do with previously sampled data? ',
         style=dict(description_width='initial'),
         button_style='info')
     self.toggle.observe(self.onPreviousSampleHandleChange)
     self.sample_size_input = widgets.BoundedIntText(
         value=0,
         min=0,
         max=0,
         step=1,
         description='Total documents you want to sample:',
         style=dict(description_width='initial'))
     self.sample_size_input.observe(self.onSampleConfigChange)
     self.sampler_cls = sampler_cls
     self.sampled_summary = widgets.HTML(value='')
     self.percent_slider = widgets.IntSlider(value=70,
                                             min=0,
                                             max=100,
                                             step=5,
                                             description='',
                                             disabled=False,
                                             continuous_update=False,
                                             orientation='horizontal',
                                             readout=True,
                                             readout_format='d')
     self.percent_slider.observe(self.onSampleConfigChange)
     # save DOC_IDs that contain or not contain keywords filters (used in sampling strategy)
     self.samples = {"contain": [], "notcontain": []}
     self.box = None
     self.data = {'docs': [], 'annos': OrderedDict()}
     self.ready = False
     # reset, continue, addmore,
     self.move_next_option = ''
     self.total = None
     self.total_contains = None
     self.un_reviewed = 0
     self.sampler = None
     self.samples = dict()
     self.current_stats = dict()
     self.max_threshold = ConfigReader.getValue("review/rb_model_threshold")
     self.sample_sizes = dict()
示例#19
0
 def navigate(self, button):
     if self.glove_path_input is not None:
         self.saveGloveConfig()
     if self.api_key_input is not None:
         self.saveAPIKey()
     else:
         self.workflow.api_key = ConfigReader.getValue("api_key")
     self.backgroundWork()
     if button.description == 'ContinueReviewing':
         self.workflow.to_continue = True
         self.workflow.steps[1].start()
         self.workflow.steps[1].complete()
     else:
         self.workflow.to_continue = False
         self.workflow.steps[1].start()
     pass
示例#20
0
def evaluate(task_name='language', classifiers=[LogisticBOWClassifiers]):
    ConfigReader()

    dbi = DBInitiater(name='db_initiator')
    tc = TaskChooser(name='tasknamer')
    dsc = DataSetChooser(
        name='dataset_chooser',
        description='<h4>Choose which dateaset you want to use: </h4>')
    anno_type = AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types')
    kf = KeywordsFiltering(name='keywords')
    ri = ReviewRBInit(name="rb_review_init")
    # mi=ReviewMLInit(name='ml_review_init')
    # ml=ReviewMLLoop(name='ml_review', ml_classifier_cls=SVMBOWClassifier)
    wf = Workflow([
        dbi, dsc, anno_type, kf, ri
        # ,mi,ml
    ])
    wf.task_name = task_name
    wf.start()
    dbi.complete()
    dsc.complete()
    anno_type.complete()
    kf.complete()
    ri.complete()
    for key, value in wf.samples.items():
        print(key, len(value))
    docs = wf.samples['docs']
    annos = wf.samples['annos']
    reviewed_docs = {
        doc_id: anno.REVIEWED_TYPE
        for doc_id, anno in annos.items() if anno.REVIEWED_TYPE is not None
    }
    x = [doc.TEXT for doc in docs[:len(reviewed_docs)]]
    y = list(reviewed_docs.values())
    print(y)
    logging.getLogger().setLevel(logging.DEBUG)
    for cl in classifiers:
        cl_instance = cl(task_name=task_name)
        print("\n\nReport performance of {}:".format(cl.__name__))
        cl_instance.train(x, y)
示例#21
0
 def start(self):
     cr = ConfigReader()
     self.workflow = Workflow([
         IntroStep(
             '<h2>Welcome to SmartAnno!</h2><h4>Do you want to start from beginning or continue previous reviewing? </h4>',
             name='intro'),
         DBInitiater(name='db_initiator'),
         DirChooser(name='choosedir'),
         ReadFiles(name='readfiles'),
         DocsToDB(name='save2db'),
         TaskChooser(name='tasknamer'),
         DataSetChooser(
             name='dataset_chooser',
             description='<h4>Choose which dateaset you want to use: </h4>'
         ),
         AnnotationTypeDef(
             '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
             'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
             'for each sample. </p>',
             name='types'),
         KeywordsFiltering(name='keywords'),
         # PreviousNextIntSlider(value=60, min=0, max=100, step=10,
         #                       description='<h4>Percentage to Filter: </h4><p>Choose how many percent of the samples '
         #                                   'you want to use the keywords filter.</p>', name='percent2filter'),
         KeywordsUMLSExtenderSetup(name='umls_extender_setup'),
         KeywordsUMLSExtender(
             name='umls_extender',
             sources=cr.getValue("umls/sources"),
             filter_by_length=cr.getValue("umls/filter_by_length"),
             filter_by_contains=cr.getValue("umls/filter_by_contains"),
             max_query=cr.getValue("umls/max_query")),
         KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'),
         KeywordsEmbeddingExtender(name='w_e_extender', max_query=40),
         ReviewRBInit(name="rb_review_init"),
         ReviewRBLoop(name='rb_review'),
         PreviousNextHTML(
             description=
             '<h2>Congratuations!</h2><h4>You have finished the initial review '
             'on the rule-base preannotations. </h4>',
             name='rb_review_done'),
         ReviewMLInit(name='ml_review_init'),
         ReviewMLLoop(name='ml_review',
                      ml_classifier_cls=self.ml_classifier_cls),
         PreviousNextHTML(
             name='finish',
             description=
             '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. '
         )
     ])
     self.workflow.start(False)
     pass
示例#22
0
    def init_real_time(self):
        self.ml_classifier = self.ml_classifier_cls(
            task_name=self.workflow.task_name)
        self.learning_pace = ConfigReader.getValue("review/ml_learning_pace")
        self.loop_workflow.filters = self.workflow.filters
        self.readData()
        if self.ml_classifier_cls.status == NotTrained:
            self.backgroundTraining()

        self.nlp = ReviewRBInit.nlp
        self.matcher = ReviewRBInit.matcher

        logMsg([doc.DOC_ID for doc in self.docs])
        if self.docs is not None and len(
                self.docs) > 0 and (self.loop_workflow is None
                                    or len(self.loop_workflow.steps) == 0):
            last_doc_pos = len(self.reviewed_docs) + 1 if len(
                self.reviewed_docs) < len(self.docs) else len(
                    self.reviewed_docs)
            for i in range(0, last_doc_pos):
                doc = self.docs[i]
                content = self.genContent(doc)
                reviewed = False
                if doc.DOC_ID in self.annos and self.annos[
                        doc.DOC_ID].REVIEWED_TYPE is not None:
                    prediction = self.annos[doc.DOC_ID].REVIEWED_TYPE
                    reviewed = True
                else:
                    prediction = self.getPrediction(doc)
                repeat_step = ReviewML(
                    description=content,
                    options=self.workflow.types,
                    value=prediction,
                    js=self.js,
                    master=self,
                    reviewed=reviewed,
                    button_style=('success' if reviewed else 'info'))
                self.appendRepeatStep(repeat_step)
        pass
from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef
from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering
from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML
from SmartAnno.utils.ReviewRBInit import ReviewRBInit
from SmartAnno.utils.ReviewRBLoop import ReviewRBLoop
from SmartAnno.utils.ReviewMLInit import ReviewMLInit
from SmartAnno.utils.ReviewMLLoop import ReviewMLLoop
from SmartAnno.models.logistic.LogisticBOWClassifiers import LogisticBOWClassifier
from SmartAnno.utils.DataSetChooser import DataSetChooser

logging.getLogger().setLevel(logging.DEBUG)

ConfigReader('../conf/smartanno_conf.json')

wf = Workflow(config_file=ConfigReader.config_file)
wf.api_key = ConfigReader.getValue("api_key")
wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite',
             sqlalchemy_dao.POOL_DISABLED)
wf.task_name = 'language'
wf.append(
    AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types'))
wf.append(KeywordsFiltering(name='keywords'))
wf.append(
    DataSetChooser(
示例#24
0
 def restoreStatus(self):
     status = ConfigReader.getValue('status/' + self.name)
     if status is None or status == '':
         status = 0
     return status
示例#25
0
 def updateStatus(self, status=None):
     if status is not None:
         self.status = status
     ConfigReader.saveStatus(self.status, status_key='status/' + self.name)
示例#26
0
from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.umls.UMLSFinder import UMLSFinder
ConfigReader()
umls = UMLSFinder(ConfigReader.getValue("api_key"),
                  sources=[],
                  filter_by_length=5,
                  max_query=50,
                  filter_by_contains=True)
print(umls.search("ketoacidosis"))
示例#27
0
# ConfigReader(config_file='../conf/smartanno_conf2.json').saveStatus(0)


from SmartAnno.utils.ConfigReader import ConfigReader
cr=ConfigReader(config_file='conf/smartanno_conf.json.bk')

示例#28
0
from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.IntroStep import IntroStep
ConfigReader('../conf/smartanno_conf2.json')
intro = IntroStep(
    '<h2>Welcome to SmartAnno!</h2><h4>First, let&apos;s import txt data from a directory. </h4>',
    name='intro')
wf = Workflow([
    intro,
    PreviousNextHTML(
        name='finish',
        description=
        '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. '
    )
])
wf.start()
intro.navigate(intro.branch_buttons[0])
示例#29
0
class DBInitiater(PreviousNext):
    """Import read documents into database"""

    def __init__(self, name=None):
        super().__init__(name)
        self.dao = None
        self.db_config = ''
        self.dbpath = ''
        self.need_import = False
        self.overwrite = False
        pass

    def start(self):
        if not hasattr(self.workflow, 'dao') or self.workflow.dao is None:
            print(self.workflow.config_file)
            self.dbpath = ConfigReader(self.workflow.config_file).getValue('db_path')
            self.db_config = ConfigReader(self.workflow.config_file).getValue('db_header') + self.dbpath

            if os.path.isfile(self.dbpath):
                self.initDao(self.db_config)
                self.displayOptions()
            else:
                self.initDao(self.db_config)
                self.createSQLTables()
                self.need_import = True
                self.next_step.start()
        else:
            self.next_step.start()
        pass

    def backStart(self):
        self.workflow.dao = None
        self.start()
        pass

    def updateBox(self):
        rows = [self.html, self.toggle] + self.addSeparator(top='10px') + \
               [self.html2, self.toggle2] + self.addSeparator(top='10px') + [
                   self.addPreviousNext(self.show_previous, self.show_next)]
        vbox = widgets.VBox(rows)
        vbox.layout.flex_grown = 'column'
        return vbox

    def initDao(self, dbfile):
        self.dao = Dao(self.db_config, sqlalchemy_dao.POOL_DISABLED)
        self.workflow.dao = self.dao
        self.workflow.dbpath = self.db_config[self.db_config.find(':///') + 4:]
        pass

    def displayOptions(self):
        clear_output()
        self.html = widgets.HTML(
            '<h4>Sqlite database "%s" exists, do you want to overwrite?</h4>'
            '<h4>choose <b>yes</b> will <span style="color:red"><b>clear all the data</b></span> in that database file</h4>' % self.dbpath)
        self.toggle = widgets.ToggleButtons(
            options=['Yes', 'No'],
            description='',
            disabled=False,
            value='No',
            button_style='',  # 'success', 'info', 'warning', 'danger' or ''
            tooltips=['Replace the old database', 'Append data to the old database'],
            layout=widgets.Layout(width='70%')
            #     icons=['check'] * 3
        )
        self.toggle.observe(self.on_click, 'value')
        self.html2 = widgets.HTML(
            '<h4>Do you want to import new data?</h4>')
        self.toggle2 = widgets.ToggleButtons(
            options=['Yes', 'No'],
            description='',
            disabled=False,
            value='No',
            button_style='',  # 'success', 'info', 'warning', 'danger' or ''
            tooltips=['Add new data to db', 'Use existing data in db'],
            layout=widgets.Layout(width='70%')
        )

        # pad the descriptions list if it is shorter than options list
        self.resetParameters()
        self.box = self.updateBox()
        display(self.box)
        pass

    def on_click(self, change):
        self.data = change['new']
        if self.data == 'Yes':
            self.toggle2.value = 'Yes'
        pass

    def complete(self):
        clear_output(True)
        if self.toggle.value == 'Yes':
            os.remove(self.dbpath)
            self.dao = Dao(self.db_config, sqlalchemy_dao.POOL_DISABLED)
            self.createSQLTables()
            self.overwrite = True
            self.need_import = True
        else:
            self.dao = Dao(self.db_config, sqlalchemy_dao.POOL_DISABLED)
            if self.toggle2.value == 'Yes':
                self.need_import = True
            self.overwrite = False
        self.workflow.dao = self.dao
        if self.next_step is not None:
            if isinstance(self.next_step, Step):
                if self.workflow is not None:
                    self.workflow.updateStatus(self.next_step.pos_id)
                self.next_step.start()
            else:
                raise TypeError(
                    'Type error for ' + self.name + '\'s next_step. Only Step can be the next_step, where its next_step is ' + str(
                        type(self.next_step)))
        else:
            print("next step hasn't been set.")
        pass

    def createSQLTables(self):
        Model.metadata.create_all(bind=self.dao._engine)
        pass
示例#30
0
import sqlalchemy_dao
from sqlalchemy_dao import Dao

from SmartAnno.utils.ConfigReader import ConfigReader
from SmartAnno.db.ORMs import Filter
from SmartAnno.gui.Workflow import Workflow
from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef
from SmartAnno.utils.IntroStep import IntroStep
from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering
from SmartAnno.utils.KeywordsUMLSExtender import KeywordsUMLSExtender
from SmartAnno.utils.KeywordsUMLSExtenderSetup import KeywordsUMLSExtenderSetup

logging.getLogger().setLevel(logging.DEBUG)

ConfigReader('../conf/smartanno_conf.json')

wf = Workflow(config_file=ConfigReader.config_file)
wf.api_key = ConfigReader.getValue("api_key")
wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED)
wf.task_name = 'language'
wf.append(AnnotationTypeDef(
    '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
    'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
    'for each sample. </p>', name='types'))
wf.append(KeywordsFiltering(
    name='keywords'))
wf.append(KeywordsUMLSExtenderSetup(name='umls_extender_setup'))
wf.append(KeywordsUMLSExtender(name='umls_extender', sources=ConfigReader.getValue("umls/sources"),
                               filter_by_length=ConfigReader.getValue("umls/filter_by_length"),
                               filter_by_contains=ConfigReader.getValue("umls/filter_by_contains"),