Exemplo n.º 1
0
 def remove(self):
     db = persistence.db()
     c = db.cursor()
     c.execute('DELETE FROM CLASSIFIER_META WHERE classifier_uid=?',
               [self.uid])
     c.execute('DELETE FROM CLASSIFIER_RESOURCE WHERE classifier_uid=?',
               [self.uid])
     c.execute('DELETE FROM CLASSIFIER WHERE uid=?', [self.uid])
     db.commit()
     logging.info('Removed classifier %s' % self.uid)
Exemplo n.º 2
0
 def save(self):
     db = persistence.db()
     c = db.cursor()
     if self.saved: return
     if Classifier.exists(self.uid):
         raise Exception('Classifier uid %s exists already' % self.uid)
     # check existence of resources
     for key, resource_uid in self.resources.iteritems():
         if not Resource.exists(resource_uid):
             raise Exception(
                 'Required resource %s of Classifier %s does not exist locally'
                 % (resource_uid, self.uid))
     # instert into DB
     try:
         c.execute(
             """INSERT INTO CLASSIFIER (
         uid,
         type,
         title,
         enabled,
         language,
         test_accuracy,
         training_set_size,
         created_on,
         finished_on,
         local_created_on,
         state) 
         VALUES (?,?,?,?,?,?,?,?,?,?,?)""", [
                 self.uid, self.model_type, self.title, self.enabled,
                 self.language, self.test_accuracy, self.training_set_size,
                 self.created_on, self.finished_on, self.local_created_on,
                 self.state
             ])
         for key, resource_uid in self.resources.iteritems():
             c.execute(
                 """INSERT INTO CLASSIFIER_RESOURCE (
               classifier_uid,
               key,
               resource_uid)
               VALUES (?,?,?)""", [self.uid, key, resource_uid])
         for key, value in self.meta.iteritems():
             c.execute(
                 """INSERT INTO CLASSIFIER_META (
               classifier_uid,
               key,
               value)
               VALUES (?,?,?)""", [self.uid, key, value])
         db.commit()
     except:
         db.rollback()
         raise Exception('Failed to insert classifier %s into DB' %
                         self.uid)
     self.saved = True
     logging.info('Inserted new classifier %s' % self.uid)
Exemplo n.º 3
0
 def set_enabled(self, enabled=True):
     db = persistence.db()
     c = db.cursor()
     self.enabled = enabled
     try:
         c.execute("UPDATE CLASSIFIER SET enabled=? WHERE uid=?",
                   [self.enabled, self.uid])
         db.commit()
     except:
         db.rollback()
         raise Exception('Failed to update classifier %s as enabled=%s' %
                         (self.uid, self.enabled))
     logging.info('Set classifier enabled=%s %s' % (enabled, self.uid))
Exemplo n.º 4
0
 def create(cls, classifier_uid):
     db = persistence.db()
     c = db.cursor()
     # generate uid
     uid = str(uuid.uuid1())
     dir_name = '%s-%s' % (datetime.datetime.now().strftime('%Y%m%d%H%M%S'),
                           uid)
     # create and fill object
     job = JobContext(uid=uid,
                      dir_name=dir_name,
                      created_on=str(datetime.datetime.now()),
                      classifier_uid=classifier_uid)
     job.save()
     job.logger.info('Created job %s' % uid)
     return job
Exemplo n.º 5
0
 def remove(self):
     db = persistence.db()
     c = db.cursor()
     c.execute("""SELECT * FROM CLASSIFIER_RESOURCE WHERE resource_uid=?""",
               [self.uid])
     if c.fetchone() != None:
         raise Exception(
             'Cannot delete resource %s because of existing dependencies' %
             (self.uid))
     try:
         shutil.rmtree(os.path.join(config.RESOURCES_PATH, self.path))
     except:
         pass
     c.execute('DELETE FROM RESOURCE WHERE UID=?', [self.uid])
     db.commit()
     logging.info('Removed resource %s' % self.uid)
Exemplo n.º 6
0
 def save(self):
     db = persistence.db()
     c = db.cursor()
     if JobContext.exists(self.uid):
         try:
             c.execute(
                 """UPDATE JOB SET
            status=?,
            progress_percentage=?,
            progress_text=?
            WHERE uid=?""", [
                     self.status, self.progress_percentage,
                     self.progress_text, self.uid
                 ])
             db.commit()
         except:
             db.rollback()
             raise Exception('Failed to update job status of %s' % self.uid)
     else:
         # instert into DB
         try:
             c.execute(
                 """INSERT INTO JOB (
            uid,
            dir_name,
            created_on,
            classifier_uid,
            status,
            progress_percentage,
            progress_text)
            VALUES (?,?,?,?,?,?,?)""", [
                     self.uid, self.dir_name, self.created_on,
                     self.classifier_uid, self.status,
                     self.progress_percentage, self.progress_text
                 ])
             db.commit()
         except:
             db.rollback()
             raise Exception('Failed to insert job %s' % self.uid)
Exemplo n.º 7
0
 def train(self, job, in_csv, in_desc_col, in_res_col, in_class_col):
     db = persistence.db()
     c = db.cursor()
     if not self.saved: self.save()
     if self.trained():
         raise Exception('Model %s is already trained' % self.uid)
     # set training start
     logging.info('Starting to train classifier %s' % self.uid)
     self.state = 'Training'
     try:
         c.execute("UPDATE CLASSIFIER SET state=? WHERE uid=?",
                   [self.state, self.uid])
         db.commit()
     except:
         db.rollback()
         raise Exception(
             'Failed to update classifier %s as training started' %
             self.uid)
     # run training
     # TODO
     # modelregistry.train(type, meta, resources, in_csv, in_text_col, in_class_col)
     # (...............)
     # set training end
     self.state = 'Ready'
     self.finished_on = str(datetime.datetime.now())
     try:
         c.execute(
             "UPDATE CLASSIFIER SET finished_on=?, state=? WHERE uid=?",
             [self.finished_on, self.state, self.uid])
         db.commit()
     except:
         db.rollback()
         raise Exception(
             'Failed to update classifier %s as training started' %
             self.uid)
     logging.info('Finished training classifier %s' % self.uid)
Exemplo n.º 8
0
 def add(cls, uid, resource_type, title, created_on, local_created_on,
         path):
     db = persistence.db()
     c = db.cursor()
     # check existence
     c.execute('SELECT * FROM RESOURCE WHERE uid=?', [uid])
     if c.fetchone() != None:
         raise Exception(
             'Cannot add resource %s because it already exists' % uid)
     # create and fill object
     resource = Resource()
     resource.uid = uid
     resource.resource_type = resource_type
     resource.title = title
     resource.created_on = created_on
     resource.local_created_on = local_created_on
     resource.path = path
     # insert to DB
     try:
         c.execute(
             """INSERT INTO RESOURCE (
         uid, 
         type, 
         title, 
         created_on, 
         local_created_on,
         path) VALUES (?,?,?,?,?,?)""", [
                 uid, resource_type, title, created_on, local_created_on,
                 path
             ])
         db.commit()
     except:
         raise Exception('Failed to insert resource %s into DB' % uid)
     # return new object
     logging.info('Inserted new resource %s' % uid)
     return resource
Exemplo n.º 9
0
import tarfile
import datetime
import json
import logging
import requests
from threading import Thread
from werkzeug import secure_filename
import logging.handlers
import fnmatch
from multiprocessing.dummy import Pool as ThreadPool

import persistence
import config
import model_registry

db = persistence.db()
c = db.cursor()
_pool = ThreadPool(config.PARALLEL_JOBS)

sql = """CREATE TABLE IF NOT EXISTS CLASSIFIER (
      uid TEXT PRIMARY KEY NOT NULL,
      type TEXT,
      title TEXT,
      enabled INT,
      language TEXT,
      test_accuracy REAL,
      training_set_size INT,
      created_on TIMESTAMP,
      local_created_on TIMESTAMP,
      finished_on TIMESTAMP,
      state TEXT