Exemplo n.º 1
0
def evaluate(task_name='language', classifiers=[LogisticBOWClassifiers]):
    ConfigReader()

    dbi = DBInitiater(name='db_initiator')
    tc = TaskChooser(name='tasknamer')
    dsc = DataSetChooser(
        name='dataset_chooser',
        description='<h4>Choose which dateaset you want to use: </h4>')
    anno_type = AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types')
    kf = KeywordsFiltering(name='keywords')
    ri = ReviewRBInit(name="rb_review_init")
    # mi=ReviewMLInit(name='ml_review_init')
    # ml=ReviewMLLoop(name='ml_review', ml_classifier_cls=SVMBOWClassifier)
    wf = Workflow([
        dbi, dsc, anno_type, kf, ri
        # ,mi,ml
    ])
    wf.task_name = task_name
    wf.start()
    dbi.complete()
    dsc.complete()
    anno_type.complete()
    kf.complete()
    ri.complete()
    for key, value in wf.samples.items():
        print(key, len(value))
    docs = wf.samples['docs']
    annos = wf.samples['annos']
    reviewed_docs = {
        doc_id: anno.REVIEWED_TYPE
        for doc_id, anno in annos.items() if anno.REVIEWED_TYPE is not None
    }
    x = [doc.TEXT for doc in docs[:len(reviewed_docs)]]
    y = list(reviewed_docs.values())
    print(y)
    logging.getLogger().setLevel(logging.DEBUG)
    for cl in classifiers:
        cl_instance = cl(task_name=task_name)
        print("\n\nReport performance of {}:".format(cl.__name__))
        cl_instance.train(x, y)
from SmartAnno.utils.ReviewRBInit import ReviewRBInit
from SmartAnno.utils.ReviewRBLoop import ReviewRBLoop
from SmartAnno.utils.ReviewMLInit import ReviewMLInit
from SmartAnno.utils.ReviewMLLoop import ReviewMLLoop
from SmartAnno.models.logistic.LogisticBOWClassifiers import LogisticBOWClassifier
from SmartAnno.utils.DataSetChooser import DataSetChooser

logging.getLogger().setLevel(logging.DEBUG)

ConfigReader('../conf/smartanno_conf.json')

wf = Workflow(config_file=ConfigReader.config_file)
wf.api_key = ConfigReader.getValue("api_key")
wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite',
             sqlalchemy_dao.POOL_DISABLED)
wf.task_name = 'language'
wf.append(
    AnnotationTypeDef(
        '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you'
        'have too many types, try set up them separately, so that you won&apos;t need to choose from a long list '
        'for each sample. </p>',
        name='types'))
wf.append(KeywordsFiltering(name='keywords'))
wf.append(
    DataSetChooser(
        name='dataset_chooser',
        description='<h4>Choose which dateaset you want to use: </h4>'))
rb = ReviewRBInit(name="rb_review_init")
wf.append(rb)
rv = ReviewRBLoop(name='rb_review', rush_rule='../conf/rush_rules.tsv')
wf.append(rv)