Пример #1
0
    def delete_dataset(id):
        dataset = DatasetManager.get_dataset(id)
        domain = 'datasets'
        ConfigurationManager.remove_section(domain, id)

        try:
            dataset.delete()
        except Exception:
            logger.warning(f'faile to delete dataset {id}')
Пример #2
0
    def list_jobs():
        job_base_dir = ConfigurationManager.get_confs('mljob').get(
            'job', 'dir')

        try:
            job_ids = [
                file for file in os.listdir(job_base_dir)
                if os.path.isdir(os.path.join(job_base_dir, file))
            ]
            results = []
            for job_id in job_ids:
                try:
                    logger.debug(f'find one job with id={job_id}')
                    item = {}
                    item['id'] = job_id
                    status = MLJob.get_status_by_id(job_id)
                    item['status'] = status.name
                    meta = MLJob.get_meta(job_id)
                    for key in ['type', 'name']:
                        item[key] = meta[key]
                    results.append(item)
                except Exception:
                    logger.exception(f'failed to retrieve job id={job_id}')
            return results
        except Exception:
            logger.exception('failed to list job')
            return []
Пример #3
0
 def getdbClient():
     dbconf = ConfigurationManager.get_confs('database')
     for section in dbconf.sections():
         if section == 'mongodb':
             host = dbconf.get(section, 'host')
             port = np.int(dbconf.get(section, 'port'))
     return MongoClient(host, port)
Пример #4
0
 def getPoolConfig():
     dbconf = ConfigurationManager.get_confs('database')
     for section in dbconf.sections():
         if section == 'redis':
             host = dbconf.get(section, 'host')
             port = np.int(dbconf.get(section, 'port'))
     return {'host': host, 'port': port}
Пример #5
0
def drawWordCloud(request):
    try:
        appname = request.json['appname']
        connection = request.json['name']
        channel = request.json['channel']
        sentiment = request.json['sentiment']
        startTime = request.json['startTime'].replace('/','-')
        endTime = request.json['endTime'].replace('/','-')
        keywordColumn = request.json['keywordColumn']
        result = DataSet().getConnectionPredictedByCondition(connection,startTime,endTime)
        if len(result) > 0:
            df = pd.DataFrame(result)

            df['keywords_counts'] = df[keywordColumn].apply(lambda x: len(x))
            df = df[df['keywords_counts'] > 0]
            comments_keyword  = df[(df[channel] == appname) & (df['sentimental'] == sentiment)][keywordColumn]
            # keywords = ','
            # keywords = keywords.join(comments_keyword.apply(lambda x:str(x[0])))
            server_config = ConfigurationManager.get_confs('server')
            filepath = server_config.get('server', 'wordcloudPath')
            filename = 'wordcloud' + datetime.now().strftime('%Y%m%d%H%M%S')
            data  = frequency_wordcloud(list(comments_keyword),filepath,filename,sentiment)
            filename_suffix = filename + '.jpeg'
            return response.json({'message':'successd to generate wordcloud','result':filename_suffix},status=200)
        else:
            return response.json({'message':'there is no record during the period',result:[]},status=200)
    except Exception:
        logger.error(Exception);
        response.json({'message':'failed to draw wordCloud'},status=500)
Пример #6
0
async def uploadModelFile(request):
    server_config = ConfigurationManager.get_confs('server')
    filepath = server_config.get('server', 'filePath')
    if not os.path.exists(filepath):
        os.makedirs(filepath)
    test_file = request.files.get('file')
    file_parameters = {
        'body': test_file.body,
        'name': test_file.name,
        'type': test_file.type,
    }

    try:
        file_path = filepath + file_parameters.get('name')
        with open(file_path, 'wb') as f:
            f.write(file_parameters['body'])
        f.close()
        print('file wrote to disk')
        return response.json(
            {
                "message": 'Upload file successfully',
                "file_names": request.files.keys(),
                "success": True
            },
            status=200)
    except Exception:
        print(Exception)
        return response.json(
            {
                "message": 'Upload file failed',
                "file_names": request.files.keys(),
                "success": False
            },
            status=500)
Пример #7
0
async def saveModel(request):
    try:
        if 'content' in request.json.keys():
            filename = request.json['content']
        else:
            filename = 'Service with specific functionality'
        server_config = ConfigurationManager.get_confs('server')
        path = server_config.get('server', 'filePath')
        department = request.json['department']
        team = request.json['team']
        version = request.json['version']
        description = request.json['description']
        features = request.json['features']
        name = request.json['name']
        model = ModelService.addModel(filename=filename,
                                      path=path,
                                      department=department,
                                      team=team,
                                      version=version,
                                      description=description,
                                      features=features,
                                      isDeployed=False,
                                      name=name)
        return response.json({'message': 'Add model successfully'}, status=200)
    except Exception:
        logger.error(Exception)
        return response.json({'message': 'Add model failed'}, status=500)
Пример #8
0
    def _handle_job_option(self):
        job_config = ConfigurationManager.get_confs('mljob')
        default_job_config = {}
        default_job_config['time_left_for_this_task'] = job_config.getint(
            'auto_ml', 'time_left_for_this_task')
        default_job_config['per_run_time_limit'] = job_config.getint(
            'auto_ml', 'per_run_time_limit')
        default_job_config[
            'initial_configurations_via_metalearning'] = job_config.getint(
                'auto_ml', 'initial_configurations_via_metalearning')
        default_job_config['ensemble_size'] = job_config.getint(
            'auto_ml', 'ensemble_size')
        default_job_config['ensemble_nbest'] = job_config.getint(
            'auto_ml', 'ensemble_nbest')
        default_job_config['ensemble_memory_limit'] = job_config.getint(
            'auto_ml', 'ensemble_memory_limit')
        default_job_config['ml_memory_limit'] = job_config.getint(
            'auto_ml', 'ml_memory_limit')

        for key in default_job_config:
            if key not in self.job_option:
                self.job_option[key] = default_job_config[key]

        self.job_option['tmp_folder'] = os.path.join(self.job_dir, 'tmp')
        self.job_option['output_folder'] = os.path.join(self.job_dir, 'output')
Пример #9
0
    def get_dataset(id):
        config = ConfigurationManager.get_confs('datasets')
        content = config.get(id, 'content')
        name = config.get(id, 'name')
        description = config.get(id, 'description')
        dataset_type = config.get(id, 'type')
        dataset_class = get_dataset_class(dataset_type)

        dataset = dataset_class(id, name, content, description)
        return dataset
Пример #10
0
    def _handle_validation_option(self):
        validation_config = ConfigurationManager.get_confs('mljob')
        default_validation = {}
        default_validation['test_size'] = validation_config.getfloat(
            'validation_option', 'test_size')
        default_validation['random_state'] = validation_config.getint(
            'validation_option', 'random_state')
        default_validation['shuffle'] = validation_config.getboolean(
            'validation_option', 'shuffle')

        for key in default_validation:
            if key not in self.validation_option:
                self.validation_option[key] = default_validation[key]
Пример #11
0
    def create_job(job_payload):
        job_type = job_payload['type']
        job_option = {}
        job_option_attrs = [
            'name',
            'dataset',
            'features',
            'targets',
            'job_option',
            'validation_option',
        ]

        for key in job_option_attrs:
            if key not in job_payload:
                job_payload[key] = {}
            job_option[key] = job_payload[key]

        if job_type == 'AutoClassificationJob':
            job = AutoClassificationJob(**job_option)
        elif job_type == 'AutoRegressionJob':
            job = AutoRegressionJob(**job_option)
        elif job_type == 'TimeSerialsForecastsJob':
            job = TimeSerialsForecastsJob(**job_option)
        else:
            raise RuntimeError(f'job type={job_type} not supported!')

        is_multi_prorcess = ConfigurationManager.get_confs('mljob').getboolean(
            'job', 'multi_processes')
        if is_multi_prorcess:
            # run train in a new process
            try:
                logger.debug(f'start new process to train ml job={job.id}')
                p = Process(target=job.train)
                p.start()
                # p.join()
                # TODO: update training status using web sock
            except:
                logger.exception(
                    f'failed to run ml job process for job={job.id}')
        else:
            try:
                logger.debug(f'start new thread to train ml job {job.id}')
                _thread.start_new_thread(job.train, ())
                # TODO: update training status using web sock
            except:
                logger.exception(
                    f'failed to run ml job thread for job={job.id}')

        return job
Пример #12
0
from dataplay.confsvc.manager import ConfigurationManager
from dataplay.datasvc.registry import DatasetTypeRegistry


dataset_type_config = ConfigurationManager.get_confs('dataset_type')
dataset_registry = DatasetTypeRegistry()
for section in dataset_type_config.sections():
    module_name = dataset_type_config.get(section, 'module')
    class_name = dataset_type_config.get(section, 'class')
    dataset_registry.register(section, class_name, module_name)
Пример #13
0
async def handle_request(request,file):
    server_config = ConfigurationManager.get_confs('server')
    filepath = server_config.get('server', 'wordcloudPath')
    return await response.file(filepath + file)
Пример #14
0
class MLJob(ABC):
    base_dir = ConfigurationManager.get_confs('mljob').get('job', 'dir')

    def __init__(self, name, dataset):
        self.id = str(uuid.uuid4())
        self.name = name
        self.dataset_id = dataset
        self.dataset = DatasetManager.get_dataset(dataset)
        self.df = self.dataset.get_df()
        self.job_dir = os.path.join(MLJob.base_dir, self.id)
        self.metadata = {}
        self._init()

    @abstractmethod
    def train(self):
        return NotImplemented

    @abstractmethod
    def predict(self, df):
        return NotImplemented

    def _build_meta(self):
        self.metadata['name'] = self.name
        self.metadata['dataset_id'] = self.dataset_id

    def _save_meta(self):
        self._build_meta()
        meta_file = os.path.join(self.job_dir, 'meta.json')
        with FileLock(meta_file):
            with open(meta_file, 'w') as f:
                f.write(json.dumps(self.metadata))

    def _save_model(self):
        logger.debug(
            f'save model for class={type(self).__name__} id={self.id} name={self.name}'
        )
        model_file = os.path.join(self.job_dir, 'model.joblib')
        dump(self, model_file)
        logger.debug('save model complete')

    @staticmethod
    def get_meta(id):
        meta_file = os.path.join(MLJob.base_dir, id, 'meta.json')
        with FileLock(meta_file):
            with open(meta_file) as f:
                return json.loads(f.read())

    @staticmethod
    def get_model(id):
        model_file = os.path.join(MLJob.base_dir, id, 'model.joblib')
        model = load(model_file)
        return model

    def _init(self):
        if os.path.isdir(self.job_dir):
            logger.error(f'job dir {self.job_dir} already exists')
            raise RuntimeError(f'job {self.id} already exists')

        try:
            os.makedirs(self.job_dir)
            self._update_status(MLJobStatus.INITIALIZED)
            self._save_meta()
        except OSError:
            logger.error(f'failed to create job dir {self.job_dir}')
        else:
            logger.debug(f'successfully created the directory {self.job_dir}')

    def _update_status(self, status):
        try:
            status_file = os.path.join(self.job_dir, 'status')
            with FileLock(status_file):
                with open(status_file, 'w') as f:
                    f.write(str(status.value))
        except Exception:
            raise RuntimeError(f'failed to update status for ml job {self.id}')

    @staticmethod
    def get_status_by_id(id):
        status_file = os.path.join(MLJob.base_dir, id, 'status')
        with FileLock(status_file):
            with open(status_file) as f:
                status_value = f.read()
                return MLJobStatus(int(status_value))

    def get_status(self):
        return MLJob.get_status_by_id(self.id)

    @staticmethod
    def delete_job_by_id(job_id):
        job_dir = os.path.join(MLJob.base_dir, job_id)
        try:
            shutil.rmtree(job_dir)
        except Exception:
            logger.exception(f'failed to delete job dir {job_dir}')
        else:
            logger.debug(f'successfully deleted the directory {job_dir}')

    def clean(self):
        MLJob.delete_job_by_id(self.id)
Пример #15
0
import os
from dataplay.confsvc.manager import ConfigurationManager


server_config = ConfigurationManager.get_confs('server')
filepath = server_config.get('server', 'datasetPath')

# CSV_DATASET_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'dataset', 'csv')

CSV_DATASET_PATH = filepath
QUERY_TYPE_NORMAL = 'query'
QUERY_TYPE_SQL = 'sql'
QUERY_TYPES = [QUERY_TYPE_NORMAL, QUERY_TYPE_SQL]
Пример #16
0
        interface=RedisSessionInterface(expiry=600,
                                        sessioncookie=True,
                                        httponly=True))
# Add cors extension
CORS(app, automatic_options=True, supports_credentials=True)

# app.blueprint(openapi_blueprint)
app.blueprint(swagger_blueprint)

app.config.API_VERSION = '1.0.0'
app.config.API_TITLE = 'Dataplay API'
app.config.API_DESCRIPTION = 'Dataplay API'
app.config.API_CONTACT_EMAIL = '*****@*****.**'
app.config.API_PRODUCES_CONTENT_TYPES = ['application/json']

server_config = ConfigurationManager.get_confs('server')
app.config.HOST = server_config.get('server', 'host')
app.config.port = 8888
app.config.DEBUG = server_config.getboolean('server', 'debug')
app.config.WORKERS = server_config.getint('server', 'workers')

dataset_type_config = ConfigurationManager.get_confs('dataset_type')
dataset_registry = DatasetTypeRegistry()
for section in dataset_type_config.sections():
    module_name = dataset_type_config.get(section, 'module')
    class_name = dataset_type_config.get(section, 'class')
    dataset_registry.register(section, class_name, module_name)

app.blueprint(file_svc)
app.blueprint(dataset_svc, url_prefix=PREFIX)
app.blueprint(user_svc, url_prefix=PREFIX)