示例#1
0
def main(unused_argv):
    job_id = FLAGS.job_id
    if job_id == "59ae047e0c11f35fafebc422":
        raise ValueError('no job_id flag')
    job = job_business.get_by_job_id(job_id)
    # project id
    project_id = job.project.id
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    # user ID
    user_ID = ow.user.user_ID
    args = job.run_args

    try:
        run_model(args['conf'], args['project_id'], args['data_source_id'],
                  args['model_id'], job_id, **args['kwargs'])
    except Exception:
        # if error send error, save error and raise error
        exc_type, exc_value, exc_traceback = sys.exc_info()
        message = {
            'error': repr(traceback.format_exception(exc_type, exc_value,
                                                     exc_traceback)),
            'type': 'model'
        }
        print(message)
        emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID)
        save_job_status(job, error=message, status=300)
    else:
        message = {
            'project_name': project.name,
            'type': 'model',
            'complete': True,
            'content': 'Model job completed in project ' + project.name
        }
        emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID)
示例#2
0
def custom_model(conf, model_fn, input_data, **kw):
    """
    :param model_fn:
    :param params:
    :param input_data:
    :param kw:
    :return:
    """
    project_id = kw.pop('project_id', None)
    job_id = kw.pop('job_id', None)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    user_ID = ow.user.user_ID
    result_sds = kw.pop('result_sds', None)
    result_dir = kw.pop('result_dir', None)
    est_params = conf.get('estimator', None)
    fit_params = conf.get('fit', {})
    eval_params = conf.get('evaluate', {})

    if result_sds is None:
        raise RuntimeError('no result sds id passed to model')
    # if project_id is None:
    #     raise RuntimeError('no project_id input')

    # def eval_input_fn():
    #     return input_fn(test, continuous_cols, categorical_cols, label_col)

    logging_flag = kw.pop('logging', True)
    return custom_model_help(model_fn, input_data, project_id, job_id, user_ID,
                             result_dir, result_sds, est_params, fit_params,
                             eval_params, logging_flag)
示例#3
0
def check_private(owned, owned_type):
    """
    check if the object is private
    :param owned:
    :param owned_type:
    :return: True for private, False for public
    """
    return ownership_business.get_ownership_by_owned_item(
        owned, owned_type).private is True
def remove_user_request_by_id(user_request_id, user_ID):
    user_request = user_request_business.get_by_user_request_id(
        user_request_id)
    # check ownership
    ownership = ownership_business.get_ownership_by_owned_item(
        user_request, 'user_request')
    if user_ID != ownership.user.user_ID:
        raise ValueError('this request not belong to this user, cannot delete')
    return user_request_business.remove_by_id(user_request_id)
示例#5
0
def run_job():
    data = request.get_json()
    job_id = data['section_id']
    project_id = data["project_id"]

    job_obj = job_business.get_by_job_id(job_id)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    # user ID
    user_ID = ow.user.user_ID
    type = None
    try:
        if job_obj.toolkit:
            type = 'toolkit'
            complete = True
            content = 'Toolkit job completed in project ' + project.name
            result = job_service.run_toolkit_job(project_id=project_id,
                                                 job_obj=job_obj)
        elif job_obj.model:
            type = 'model'
            complete = False
            content = 'Model job successfully created in project ' + \
                      project.name
            result = job_service.run_model_job(project_id=project_id,
                                               job_obj=job_obj)
        else:
            return jsonify({"response":
                            'no model and toolkit in job object'}), 400
        result = json_utility.convert_to_json(result)
    except Exception as e:
        # if error send error, save error and raise error
        exc_type, exc_value, exc_traceback = sys.exc_info()
        message = {
            'error':
            repr(traceback.format_exception(exc_type, exc_value,
                                            exc_traceback)),
            'type':
            type
        }
        print(message)
        emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID)
        save_job_status(job_obj, error=message, status=300)
        raise e
        # return jsonify({
        #     "response": {
        #         "result": message
        #     }}), 200
    else:
        message = {
            'project_name': project.name,
            'type': type,
            'complete': complete,
            'content': content
        }
        emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID)
        return jsonify({"response": {"result": result}}), 200
def update_request_answer(request_answer_id, user_id, answer):
    request_answer = request_answer_business. \
        get_by_request_answer_id(request_answer_id)
    ownership = ownership_business.get_ownership_by_owned_item(
        request_answer, 'request_answer')
    if ownership.user.user_ID != user_id:
        raise RuntimeError(
            'this request not belong to this user, cannot update')
    else:
        request_answer_business.update_request_answer_by_id(
            request_answer_id=request_answer_id, answer=answer)
示例#7
0
def update_user_request_comments(user_request_comments_id, user_ID, comments):
    user_request_comments = comments_business.\
        get_by_user_request_comments_id(user_request_comments_id)
    ownership = ownership_business.get_ownership_by_owned_item(
        user_request_comments, 'user_request_comments')
    if ownership.user.user_ID != user_ID:
        raise RuntimeError(
            'this request not belong to this user, cannot update')
    else:
        comments_business.update_user_request_comments_by_id(
            user_request_comments_id=user_request_comments_id,
            comments=comments)
def accept_request_answer(user_request_id, user_ID, request_answer_id):
    user_request = user_request_business. \
        get_by_user_request_id(user_request_id)
    ownership = ownership_business.get_ownership_by_owned_item(
        user_request, 'user_request')
    if ownership.user.user_ID != user_ID:
        raise RuntimeError(
            'this request not belong to this user, cannot update')
    else:
        user_request_business.update_user_request_by_id(
            user_request_id=user_request_id,
            accept_answer=ObjectId(request_answer_id))
示例#9
0
def to_code():
    data = request.get_json()
    job_id = data['section_id']
    project_id = data["project_id"]

    job_obj = job_business.get_by_job_id(job_id)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    # user ID
    user_ID = ow.user.user_ID
    code = job_service.model_job_to_code(project_id=project_id,
                                         job_obj=job_obj)
    return jsonify({"response": {"code": code}}), 200
示例#10
0
def get_results_dir_by_job_id(job_id, user_ID, checkpoint='final'):
    """
    get training result by job id
    :param job_id:
    :param user_ID:
    :param checkpoint:
    :return:
    """
    project = job_business.get_by_job_id(job_id).project
    project_name = project.name
    ownership = ownership_business.get_ownership_by_owned_item(project,
                                                               'project')
    if ownership.private and ownership.user.user_ID != user_ID:
        raise ValueError('Authentication failed')
    user_ID = ownership.user.user_ID
    result_dir = os.path.join(user_directory + user_ID + '/',
                              project_name + '/', job_id)
    filename = '{}.hdf5'.format(checkpoint)
    return result_dir, filename
示例#11
0
def remove_project_by_id(project_id, user_ID):
    """
    remove project by its object_id
    :param project_id: object_id of project to remove
    :return:
    """
    project = project_business.get_by_id(project_id)
    # check ownership
    ownership = ownership_business.get_ownership_by_owned_item(project,
                                                               'project')
    if user_ID != ownership.user.user_ID:
        raise ValueError('project not belong to this user, cannot delete')
    # delete tmp jupyterhub user
    delete_hub_user(user_ID, project.name)
    # delete project directory
    project_directory = UPLOAD_FOLDER + user_ID + '/' + project.name
    if os.path.isdir(project_directory):
        shutil.rmtree(project_directory)
    # delete project object
    return project_business.remove_by_id(project_id)
示例#12
0
def mlp(conf, input, **kw):
    result_sds = kw.pop('result_sds', None)
    project_id = kw.pop('project_id', None)
    result_dir = kw.pop('result_dir', None)
    job_id = kw.pop('job_id', None)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    user_ID = ow.user.user_ID
    f = conf['fit']
    e = conf['evaluate']
    x_train = input['x_tr']
    y_train = input['y_tr']
    x_val = input['x_te']
    y_val = input['y_te']
    x_test = input['x_te']
    y_test = input['y_te']

    with graph.as_default():
        return mlp_main(result_sds, project_id, job_id, user_ID, result_dir,
                        x_train, y_train, x_val, y_val, x_test, y_test, f, e)
示例#13
0
def update_project(project_id, name, description, is_private=True,
                   related_fields=[], tags=[], related_tasks=[],
                   done_indices=[]):
    """
    Create a new project

    :param name: str
    :param description: str
    :param user_ID: ObjectId
    :param is_private: boolean
    :return: a new created project object
    """
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    ownership_business.update_by_id(ow['id'], private=is_private)
    project_business.update_by_id(project_id, name=name,
                                  description=description,
                                  update_time=datetime.utcnow(),
                                  related_fields=related_fields,
                                  tags=tags, related_tasks=related_tasks,
                                  done_indices=done_indices)
示例#14
0
def fork(project_id, new_user_ID):
    """
    fork project
    :param project_id:
    :param new_user_ID:
    :return:
    """
    # get project
    project = project_business.get_by_id(project_id)

    # get ownership, and check privacy
    ownership = ownership_business.get_ownership_by_owned_item(
        project, 'project')
    if ownership.private is True:
        raise NameError('forked project is private, fork failed')
    if ownership.user.user_ID == new_user_ID:
        raise NameError('you are forking your self project')
    # get user object
    user = UserBusiness.get_by_user_ID(new_user_ID)
    # copy and save project
    project_cp = project_business.copy(project)
    # create ownership relation
    ownership_business.add(user, True, project=project_cp)

    # copy staging data sets
    sds_array = staging_data_set_business.get_by_project_id(project_id, False)
    for sds in sds_array:
        staging_data_service.copy_staging_data_set(sds, project_cp)

    # copy jobs and save
    jobs = project.jobs
    jobs_cp = []
    for job in jobs:
        # get source sds
        if hasattr(job, 'staging_data_set') and job.staging_data_set:
            sds_cp = staging_data_set_business.get_by_name_and_project(
                job.staging_data_set.name, job.staging_data_set.project)
            # sds_cp = staging_data_service.copy_staging_data_set(
            #     job.staging_data_set, project_cp)
        else:
            sds_cp = None
        # copy job
        job_cp = job_business.copy_job(job, project_cp, sds_cp)
        if not job_cp:
            continue
        jobs_cp.append(job_cp)
        # copy result staging data set by job and bind to project
        try:
            # get result sds
            result_sds = staging_data_set_business.get_by_job_id(job['id'])
            # bind job to sds
            staging_data_set_business.update_job_by_name_and_project(
                result_sds.name, result_sds.project, job_cp)
            # staging_data_service.copy_staging_data_set(result_sds, project_cp,
            #                                            belonged_job=job_cp)
        except DoesNotExist:
            pass

    project_business.update_by_id(project_cp['id'], jobs=jobs_cp)
    project_cp.reload()
    return project_cp
示例#15
0
def unpublish_project(project_id):
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    return ownership_business.update_by_id(ow['id'], private=True)
示例#16
0
def get_by_id(project_id):
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    project.is_private = ow.private
    return project
示例#17
0
def run_model(conf, project_id, data_source_id, model_id, job_id, **kwargs):
    """
    run model by model_id and the parameter config

    :param conf:
    :param project_id:
    :param data_source_id:
    :param model_id:
    :param job_id:
    :param kwargs:
    :return:
    """
    model = model_business.get_by_model_id(model_id)
    project = project_business.get_by_id(project_id)
    ownership = ownership_business.get_ownership_by_owned_item(project,
                                                               'project')
    result_dir = os.path.join(user_directory, ownership.user.user_ID,
                              project.name, job_id)

    # import model function
    if model['category'] == ModelType['neural_network']:
        # keras nn
        f = getattr(models, model.entry_function)

        input_dict = manage_nn_input(conf, data_source_id, **kwargs)
        return job_service.run_code(conf, project_id, data_source_id,
                                    model, f, job_id, input_dict,
                                    result_dir=result_dir)
    elif model['category'] == ModelType['unstructured']:
        # input from folder
        f = getattr(models, model.entry_function)
        input_dict = model_input_manager_unstructured(conf, data_source_id,
                                                      **kwargs)
        return job_service.run_code(conf, project_id, None,
                                    model, f, job_id, input_dict,
                                    file_id=data_source_id,
                                    result_dir=result_dir)
    elif model['category'] == ModelType['hyperopt']:
        f = getattr(models, model.entry_function)
        fit = conf.get('fit', None)
        data_fields = fit.get('data_fields', [[], []])
        input_dict = model_input_manager_custom_supervised(data_fields,
                                                           data_source_id,
                                                           model.name,
                                                           **kwargs)
        return job_service.run_code(conf, project_id, data_source_id,
                                    model, f, job_id, input_dict,
                                    result_dir=result_dir)
    else:
        # custom models
        f = models.custom_model
        model_fn = getattr(models, model.entry_function)
        fit = conf.get('fit', None)
        if model['category'] == ModelType['custom_supervised']:
            data_fields = fit.get('data_fields', [[], []])
            input_dict = model_input_manager_custom_supervised(data_fields,
                                                               data_source_id,
                                                               model.name,
                                                               **kwargs)
            return job_service.run_code(conf, project_id, data_source_id,
                                        model, f, job_id, model_fn, input_dict,
                                        result_dir=result_dir)
        if model['category'] == ModelType['unsupervised']:
            x_cols = fit.get('data_fields', [])
            input_dict = model_input_manager_unsupervised(x_cols,
                                                          data_source_id,
                                                          model.name,
                                                          **kwargs)
            return job_service.run_code(conf, project_id, data_source_id,
                                        model, f, job_id, model_fn, input_dict,
                                        result_dir=result_dir)


        if model['category'] == ModelType['hyperopt']:
            data_fields = fit.get('data_fields', [[], []])
            input_dict = model_input_manager_custom_supervised(data_fields,
                                                               data_source_id,
                                                               model.name,
                                                               **kwargs)
            return job_service.run_code(conf, project_id, data_source_id,
                                        model, f, job_id, model_fn, input_dict,
                                        result_dir=result_dir)
示例#18
0
文件: keras_seq.py 项目: zjn0224/mo
def keras_seq(conf, input, **kw):
    """
    a general implementation of sequential model of keras
    :param conf: config dict
    :return:
    """
    result_sds = kw.pop('result_sds', None)
    project_id = kw.pop('project_id', None)
    job_id = kw.pop('job_id', None)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    user_ID = ow.user.user_ID
    print('conf')
    print(conf)
    result_dir = kw.pop('result_dir', None)
    if result_sds is None:
        raise RuntimeError('no result sds id passed to model')
    if project_id is None:
        raise RuntimeError('no project id passed to model')

    with graph.as_default():
        model = Sequential()

        ls = conf['layers']
        comp = conf['compile']
        f = conf['fit']
        e = conf['evaluate']
        x_train = input['x_tr']
        y_train = input['y_tr']
        x_val = input['x_te']
        y_val = input['y_te']
        x_test = input['x_te']
        y_test = input['y_te']

        training_logger = logger_service.TrainingLogger(f['args']['epochs'],
                                                        project_id,
                                                        job_id,
                                                        user_ID,
                                                        result_sds)

        # TODO add validator
        # op = comp['optimizer']

        # loop to add layers
        for l in ls:
            # get layer class from keras
            layer_class = getattr(layers, l['name'])
            # add layer
            model.add(layer_class(**l['args']))

        # optimiser
        # sgd_class = getattr(optimizers, op['name'])
        # sgd = sgd_class(**op['args'])

        # define the metrics
        # compile
        model.compile(**comp['args'])

        # callback to save metrics
        batch_print_callback = LambdaCallback(on_epoch_begin=
                                              lambda epoch, logs:
                                              training_logger.log_epoch_begin(
                                                  epoch, logs),
                                              on_epoch_end=
                                              lambda epoch, logs:
                                              training_logger.log_epoch_end(
                                                  epoch, logs),
                                              on_batch_end=
                                              lambda batch, logs:
                                              training_logger.log_batch_end(
                                                  batch, logs)
                                              )

        # checkpoint to save best weight
        best_checkpoint = MyModelCheckpoint(
            os.path.abspath(os.path.join(result_dir, 'best.hdf5')),
            save_weights_only=True,
            verbose=1, save_best_only=True)
        # checkpoint to save latest weight
        general_checkpoint = MyModelCheckpoint(
            os.path.abspath(os.path.join(result_dir, 'latest.hdf5')),
            save_weights_only=True,
            verbose=1)

        # training
        history = model.fit(x_train, y_train,
                            validation_data=(x_val, y_val),
                            callbacks=[batch_print_callback, best_checkpoint,
                                       general_checkpoint],
                            verbose=0,
                            **f['args'])

        # testing
        score = model.evaluate(x_test, y_test, **e['args'])
        # weights = model.get_weights()
        config = model.get_config()
        logger_service.log_train_end(result_sds,
                                     model_config=config,
                                     score=score,
                                     history=history.history)
        keras_saved_model.save_model(result_dir, model)
        return {'score': score, 'history': history.history}