def custom_model(conf, model_fn, input_data, **kw): """ :param model_fn: :param params: :param input_data: :param kw: :return: """ project_id = kw.pop('project_id', None) job_id = kw.pop('job_id', None) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') user_ID = ow.user.user_ID result_sds = kw.pop('result_sds', None) result_dir = kw.pop('result_dir', None) est_params = conf.get('estimator', None) fit_params = conf.get('fit', {}) eval_params = conf.get('evaluate', {}) if result_sds is None: raise RuntimeError('no result sds id passed to model') # if project_id is None: # raise RuntimeError('no project_id input') # def eval_input_fn(): # return input_fn(test, continuous_cols, categorical_cols, label_col) logging_flag = kw.pop('logging', True) return custom_model_help(model_fn, input_data, project_id, job_id, user_ID, result_dir, result_sds, est_params, fit_params, eval_params, logging_flag)
def main(unused_argv): job_id = FLAGS.job_id if job_id == "59ae047e0c11f35fafebc422": raise ValueError('no job_id flag') job = job_business.get_by_job_id(job_id) # project id project_id = job.project.id project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') # user ID user_ID = ow.user.user_ID args = job.run_args try: run_model(args['conf'], args['project_id'], args['data_source_id'], args['model_id'], job_id, **args['kwargs']) except Exception: # if error send error, save error and raise error exc_type, exc_value, exc_traceback = sys.exc_info() message = { 'error': repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), 'type': 'model' } print(message) emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID) save_job_status(job, error=message, status=300) else: message = { 'project_name': project.name, 'type': 'model', 'complete': True, 'content': 'Model job completed in project ' + project.name } emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID)
def run_job(): data = request.get_json() job_id = data['section_id'] project_id = data["project_id"] job_obj = job_business.get_by_job_id(job_id) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') # user ID user_ID = ow.user.user_ID type = None try: if job_obj.toolkit: type = 'toolkit' complete = True content = 'Toolkit job completed in project ' + project.name result = job_service.run_toolkit_job(project_id=project_id, job_obj=job_obj) elif job_obj.model: type = 'model' complete = False content = 'Model job successfully created in project ' + \ project.name result = job_service.run_model_job(project_id=project_id, job_obj=job_obj) else: return jsonify({"response": 'no model and toolkit in job object'}), 400 result = json_utility.convert_to_json(result) except Exception as e: # if error send error, save error and raise error exc_type, exc_value, exc_traceback = sys.exc_info() message = { 'error': repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), 'type': type } print(message) emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID) save_job_status(job_obj, error=message, status=300) raise e # return jsonify({ # "response": { # "result": message # }}), 200 else: message = { 'project_name': project.name, 'type': type, 'complete': complete, 'content': content } emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID) return jsonify({"response": {"result": result}}), 200
def wrapper(*args, **kw): # create a job # model_obj = model_business.get_by_model_id(model_id) result_dir = kwargs.get('result_dir') project_obj = project_business.get_by_id(project_id) job_obj = job_business.get_by_job_id(job_id) # update a project project_business.insert_job_by_id(project_id, job_obj.id) project_business.update_items_to_list_field(project_id, related_tasks=TYPE.get( model_obj.category, [])) # create result sds for model sds_name = '%s_%s_result' % (model_obj['name'], job_obj['id']) try: sds = staging_data_set_business.get_by_job_id(job_obj.id) except DoesNotExist: print('free to create sds') else: staging_data_set_business.remove_by_id(sds.id) finally: result_sds_obj = staging_data_set_business.add(sds_name, 'des', project_obj, job=job_obj, type='result') # run if result_dir: # result_dir += str(job_obj['id']) + '/' try: os.makedirs(result_dir) except FileExistsError: print('dir exists, no need to create') kw['result_dir'] = result_dir # generate_job_py(func, *args, **kw, result_sds=result_sds_obj, # project_id=project_id) func_result = func(*args, **kw, result_sds=result_sds_obj, project_id=project_id, job_id=job_id) # update a job job_business.end_job(job_obj) if isinstance(func_result, dict): func_result['job_id'] = str(job_obj['id']) return func_result
def to_code(): data = request.get_json() job_id = data['section_id'] project_id = data["project_id"] job_obj = job_business.get_by_job_id(job_id) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') # user ID user_ID = ow.user.user_ID code = job_service.model_job_to_code(project_id=project_id, job_obj=job_obj) return jsonify({"response": {"code": code}}), 200
def mlp(conf, input, **kw): result_sds = kw.pop('result_sds', None) project_id = kw.pop('project_id', None) result_dir = kw.pop('result_dir', None) job_id = kw.pop('job_id', None) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') user_ID = ow.user.user_ID f = conf['fit'] e = conf['evaluate'] x_train = input['x_tr'] y_train = input['y_tr'] x_val = input['x_te'] y_val = input['y_te'] x_test = input['x_te'] y_test = input['y_te'] with graph.as_default(): return mlp_main(result_sds, project_id, job_id, user_ID, result_dir, x_train, y_train, x_val, y_val, x_test, y_test, f, e)
def remove_project_by_id(project_id, user_ID): """ remove project by its object_id :param project_id: object_id of project to remove :return: """ project = project_business.get_by_id(project_id) # check ownership ownership = ownership_business.get_ownership_by_owned_item(project, 'project') if user_ID != ownership.user.user_ID: raise ValueError('project not belong to this user, cannot delete') # delete tmp jupyterhub user delete_hub_user(user_ID, project.name) # delete project directory project_directory = UPLOAD_FOLDER + user_ID + '/' + project.name if os.path.isdir(project_directory): shutil.rmtree(project_directory) # delete project object return project_business.remove_by_id(project_id)
def update_project(project_id, name, description, is_private=True, related_fields=[], tags=[], related_tasks=[], done_indices=[]): """ Create a new project :param name: str :param description: str :param user_ID: ObjectId :param is_private: boolean :return: a new created project object """ project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') ownership_business.update_by_id(ow['id'], private=is_private) project_business.update_by_id(project_id, name=name, description=description, update_time=datetime.utcnow(), related_fields=related_fields, tags=tags, related_tasks=related_tasks, done_indices=done_indices)
def get_by_id(project_id): project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') project.is_private = ow.private return project
def kube_run_model(conf, project_id, data_source_id, model_id, job_obj, **kwargs): # file_id = kwargs.get('file_id') staging_data_set_obj = None if data_source_id: staging_data_set_obj = \ staging_data_set_business.get_by_id(data_source_id) project_obj = project_business.get_by_id(project_id) # file_dict = {'file': ObjectId(file_id)} if file_id else {} model_obj = model_business.get_by_model_id(model_id) run_args = { "conf": conf, "project_id": project_id, "data_source_id": data_source_id, "model_id": model_id, "kwargs": kwargs } job_obj = job_business.update_job_by_id(job_obj.id, model=model_obj, staging_data_set=staging_data_set_obj, project=project_obj, params=conf, run_args=run_args, status=100) job_id = str(job_obj.id) print(job_id) return run_model(conf, project_id, data_source_id, model_id, job_id, **kwargs) cwd = os.getcwd() job_name = job_id + '-training-job' client = kube_service.client try: # TODO need to terminate running pod kube_service.delete_job(job_name) while True: kube_service.get_job(job_name) time.sleep(1) except client.rest.ApiException: print('job not exists or deleted, ok to create') kube_json = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "name": job_name }, "spec": { "template": { "metadata": { "labels": { "app": job_id } }, "spec": { "containers": [ { "name": job_id, "image": "10.52.14.192/gzyw/model_app_pre", "imagePullPolicy": "IfNotPresent", "securityContext": { "privileged": True, }, "stdin": True, "command": ["/usr/local/bin/python"], "args": [ "run_model.py", "--job_id", job_id ], "volumeMounts": [ { "mountPath": "/pyserver/user_directory", "name": "nfsvol" }, ] } ], "restartPolicy": "Never", # "activeDeadlineSeconds": 1, "volumes": [ { "name": "nfsvol", "persistentVolumeClaim": { "claimName": "nfs-pvc" } }, ] }, }, } } # file_utils.write_to_filepath(json.dumps(kube_json), './model_app.json') # return api = kube_service.job_api resp = api.create_namespaced_job(body=kube_json, namespace=NAMESPACE) print("Job created. status='%s'" % str(resp.status)) return {'job_id': job_id}
def unpublish_project(project_id): project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') return ownership_business.update_by_id(ow['id'], private=True)
def wrapper(*args, **kw): # create a job staging_data_set_obj = staging_data_set_business.get_by_id( staging_data_set_id) project_obj = project_business.get_by_id(project_id) job_spec = { "fields": { "source": fields[0], "target": fields[1] }, "params": kw } job_obj = job_business.add_toolkit_job(toolkit_obj, staging_data_set_obj, project_obj, **job_spec) # update a project project_business.insert_job_by_id(project_id, job_obj.id) # calculate func_rst = func(*args, **kw) result = list(func_rst) if isinstance(func_rst, tuple) else [func_rst] # 新设计的存取方式 results = {"fields": {"source": fields[0], "target": fields[1]}} gen_info = [] result_spec = toolkit_obj.result_spec for arg in result_spec["args"]: value = result.pop(0) results.update({arg["name"]: value}) if arg["if_add_column"]: # 不能使用中文名 str_name = "%s_col" % toolkit_obj.entry_function value = data_utility.retrieve_nan_index(value, nan_index) try: staging_data_service.update_many_with_new_fields( value, nan_index, fields[0], str_name, staging_data_set_id) except (TypeError, ValueError) as e: print("ERRORS in data saved to database") if arg.get("attribute", False) and arg["attribute"] == "label": labels = value elif arg.get("attribute", False) and arg["attribute"] == "general_info": gen_info.append({ arg["name"]: { "value": value, "description": arg["des"] } }) # 可视化计算 # 聚类分析 if toolkit_obj.category == 0: json = { "scatter": data_utility.retrieve_nan_index(args[0], nan_index), "labels": labels, "pie": [{ 'name': el, 'value': labels.count(el) } for el in set(labels)], "centers": results["Centroids of Clusters"], "general_info": gen_info, "fields": fields[0], "category": toolkit_obj.category } # 特征选取 elif toolkit_obj.category == 1: from scipy.stats import pearsonr # from minepy import MINE data = list(zip(*args[0])) target_flag = 1 if len(args) == 2 else 0 target = args[1] if target_flag else None json = { "Y_target": fields[1], "X_fields": fields[0], "labels": labels, "bar": results["scores"], "general_info": { "Selected Features": "%s out of %s" % (len(list(filter(lambda x: x is True, labels))), len(fields[0])), "Selected Fields": " ".join( str(el) for el in list(compress(fields[0], labels))), "Number of NaN": len(nan_index) }, "scatter": { "y_domain": target, "x_domain": data, "pearsonr": [ pearsonr(el, target)[0] if target_flag else None for el in data ], # "mic": [MINE(alpha=0.6, c=15, est="mic_approx").compute_score(el, # list(data[0]).mic()) for el in list(data[1:])]} "mic": [None for el in data] }, "category": toolkit_obj.category } # 数值转换 elif toolkit_obj.category == 2: inn = 0 while inn in nan_index: inn = inn + 1 # 由于出来的数据格式不一致,判断是否为二维数据(是=>1, 不是=>0) flag_shape = 1 if isinstance(labels[inn], list) else 0 result_be = labels if flag_shape else np.array(labels).reshape( [-1, 1]).tolist() data = list(zip(*args[0])) result = list(zip(*result_be)) # 曾经两表合并,现在不需要了 # merge_data = list(zip(*(data + result))) if len(result) == len(fields[0]): lab_fields = [ str(fields[0][i]) + "_New_Col" for i in range(len(result)) ] else: lab_fields = [ str(fields[0][0]) + "_New_Col_" + str(i) for i in range(len(result)) ] # merge_fields = fields[0] + lab_fields flag_str1 = isinstance(args[0][inn][0], str) flag_str2 = isinstance(result_be[inn][0], str) bar1 = [] bar2 = [] for el in fields[0]: indx = fields[0].index(el) raw_d = data[indx] if not flag_str1 and len(set(raw_d)) > 5: bar1_tmp = visualization_service.freq_hist(raw_d) else: seta = set(raw_d) x_domain = [el for el in seta] y_domain = [raw_d.count(el) for el in seta] bar1_tmp = {'x_domain': x_domain, 'y_domain': y_domain} bar1_tmp.update({"field": el, "title": "数据分布直方图(栏位转换前)"}) bar1.append(bar1_tmp) for el in lab_fields: indx = lab_fields.index(el) raw_re = result[indx] if not flag_str2 and len(set(raw_re)) > 5: bar2_tmp = visualization_service.freq_hist(raw_re) else: seta = set(raw_re) x_domain = [el for el in seta] y_domain = [raw_re.count(el) for el in seta] bar2_tmp = {'x_domain': x_domain, 'y_domain': y_domain} bar2_tmp.update({"field": el, "title": "数据分布直方图(栏位转换后)"}) bar2.append(bar2_tmp) json = { "category": toolkit_obj.category, "table1": { "title": "原始数据", "field": fields[0], "data": [dict(zip(fields[0], arr)) for arr in args[0]] }, "table2": { "title": "转换后数据", "field": lab_fields, "data": [dict(zip(lab_fields, arr)) for arr in result_be] }, "bar1": bar1, "bar2": bar2 } # 降维 elif toolkit_obj.category == 3: flag = toolkit_obj.parameter_spec["data"]["type"][ "key"] == "transfer_box" data = list(zip(*args[0])) if flag: data.append(args[1]) lab = list(zip(*labels)) lab_fields = ["New Col" + str(i) for i in range(len(lab))] var1 = [np.var(da) for da in data] var2 = [np.var(da) for da in lab] merge_fields = fields[0] + fields[1] if fields[1] else \ fields[0] x_domain = merge_fields + ["_empty"] + lab_fields y_domain = var1 + [0] + var2 temp = var1[:-1] if flag else var1 json = { "table1": { "X_fields": fields[0], "Y_fields": fields[1], "data": [ dict(zip(merge_fields, arr)) for arr in list(zip(*data)) ] }, "table2": { "data": [dict(zip(lab_fields, arr)) for arr in labels], "fields": lab_fields }, "bar": { "x_domain": x_domain, "y_domain": y_domain }, "pie1": [{ "name": fields[0][i], "value": temp[i] } for i in range(len(temp))], "pie2": [{ "name": lab_fields[i], "value": var2[i] } for i in range(len(var2))], "general_info": gen_info, "category": toolkit_obj.category } else: json = {} # update a job job_business.end_job(job_obj) if result_spec["if_reserved"]: # create result sds for toolkit sds_name = '%s_%s_result' % (toolkit_obj['name'], job_obj['id']) result_sds_obj = staging_data_set_business.add(sds_name, 'des', project_obj, job=job_obj, type='result') logger_service.save_result( result_sds_obj, **{"result": json_utility.convert_to_json(results)}) logger_service.save_result(result_sds_obj, **{"visualization": json}) return { "visual_sds_id": str(result_sds_obj.id) if json else None, "result": results } return {"result": results}
def list_by_project_id(project_id): project = project_business.get_by_id(project_id) return job_business.get_by_project(project)
def keras_seq(conf, input, **kw): """ a general implementation of sequential model of keras :param conf: config dict :return: """ result_sds = kw.pop('result_sds', None) project_id = kw.pop('project_id', None) job_id = kw.pop('job_id', None) project = project_business.get_by_id(project_id) ow = ownership_business.get_ownership_by_owned_item(project, 'project') user_ID = ow.user.user_ID print('conf') print(conf) result_dir = kw.pop('result_dir', None) if result_sds is None: raise RuntimeError('no result sds id passed to model') if project_id is None: raise RuntimeError('no project id passed to model') with graph.as_default(): model = Sequential() ls = conf['layers'] comp = conf['compile'] f = conf['fit'] e = conf['evaluate'] x_train = input['x_tr'] y_train = input['y_tr'] x_val = input['x_te'] y_val = input['y_te'] x_test = input['x_te'] y_test = input['y_te'] training_logger = logger_service.TrainingLogger(f['args']['epochs'], project_id, job_id, user_ID, result_sds) # TODO add validator # op = comp['optimizer'] # loop to add layers for l in ls: # get layer class from keras layer_class = getattr(layers, l['name']) # add layer model.add(layer_class(**l['args'])) # optimiser # sgd_class = getattr(optimizers, op['name']) # sgd = sgd_class(**op['args']) # define the metrics # compile model.compile(**comp['args']) # callback to save metrics batch_print_callback = LambdaCallback(on_epoch_begin= lambda epoch, logs: training_logger.log_epoch_begin( epoch, logs), on_epoch_end= lambda epoch, logs: training_logger.log_epoch_end( epoch, logs), on_batch_end= lambda batch, logs: training_logger.log_batch_end( batch, logs) ) # checkpoint to save best weight best_checkpoint = MyModelCheckpoint( os.path.abspath(os.path.join(result_dir, 'best.hdf5')), save_weights_only=True, verbose=1, save_best_only=True) # checkpoint to save latest weight general_checkpoint = MyModelCheckpoint( os.path.abspath(os.path.join(result_dir, 'latest.hdf5')), save_weights_only=True, verbose=1) # training history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[batch_print_callback, best_checkpoint, general_checkpoint], verbose=0, **f['args']) # testing score = model.evaluate(x_test, y_test, **e['args']) # weights = model.get_weights() config = model.get_config() logger_service.log_train_end(result_sds, model_config=config, score=score, history=history.history) keras_saved_model.save_model(result_dir, model) return {'score': score, 'history': history.history}
def model_to_code(conf, project_id, data_source_id, model_id, job_obj, **kwargs): """ run model by model_id and the parameter config :param conf: :param project_id: :param data_source_id: :param model_id: :param kwargs: :return: """ file_id = kwargs.get('file_id') staging_data_set_obj = None if data_source_id: staging_data_set_obj = \ staging_data_set_business.get_by_id(data_source_id) project_obj = project_business.get_by_id(project_id) file_dict = {'file': ObjectId(file_id)} if file_id else {} model_obj = model_business.get_by_model_id(model_id) run_args = { "conf": conf, "project_id": project_id, "data_source_id": data_source_id, "model_id": model_id, "kwargs": kwargs } # # create model job # job_obj = job_business.add_model_job(model_obj, staging_data_set_obj, # project_obj, params=conf, # run_args=run_args, # **file_dict) job_obj = job_business.update_job_by_id(job_obj.id, model=model_obj, staging_data_set=staging_data_set_obj, project=project_obj, params=conf, run_args=run_args, status=100) job_id = str(job_obj.id) # model_obj = model_business.get_by_model_id(model_id) f = getattr(models, model_obj.to_code_function) if model_obj['category'] == 0: # keras nn head_str = manage_supervised_input_to_str(conf, data_source_id, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model_obj, f, job_id, head_str) elif model_obj['category'] == ModelType['unstructured']: # input from folder head_str = manage_unstructured_to_str(conf, data_source_id, **kwargs) return job_service.run_code(conf, project_id, None, model_obj, f, job_id, head_str, file_id=data_source_id) elif model_obj['category'] == ModelType['advanced']: # no input return job_service.run_code(conf, project_id, None, model_obj, f, job_id, '', file_id=None) else: # custom models head_str = '' head_str += 'import logging\n' head_str += 'import numpy as np\n' head_str += 'import pandas as pd\n' head_str += 'import tensorflow as tf\n' head_str += 'from tensorflow.python.framework import constant_op\n' head_str += 'from tensorflow.python.framework import dtypes\n' head_str += 'from tensorflow.contrib.learn.python.learn import metric_spec\n' head_str += 'from server3.lib import models\n' head_str += 'from server3.lib.models.modified_tf_file.monitors import ValidationMonitor\n' head_str += 'from server3.business import staging_data_set_business\n' head_str += 'from server3.business import staging_data_business\n' head_str += 'from server3.service import staging_data_service\n' head_str += "from server3.service import job_service\n" head_str += 'from server3.service.model_service import ' \ 'split_categorical_and_continuous\n' head_str += 'from server3.service.custom_log_handler ' \ 'import MetricsHandler\n' head_str += 'model_fn = models.%s\n' % model_obj.entry_function head_str += "data_source_id = '%s'\n" % data_source_id head_str += "model_name = '%s'\n" % model_obj.name head_str += "kwargs = %s\n" % kwargs fit = conf.get('fit', None) if model_obj['category'] == 1: data_fields = fit.get('data_fields', [[], []]) head_str += 'data_fields = %s\n' % data_fields head_str += inspect.getsource( model_input_manager_custom_supervised) head_str += "input_dict = model_input_manager_custom_supervised(" \ "data_fields, data_source_id, model_name, **kwargs)\n" elif model_obj['category'] == 2: x_cols = fit.get('data_fields', []) head_str += "x_cols = %s\n" % x_cols head_str += inspect.getsource(model_input_manager_unsupervised) head_str += "input_dict = model_input_manager_unsupervised(x_cols, " \ "data_source_id, model_name)\n" return job_service.run_code(conf, project_id, data_source_id, model_obj, f, job_id, head_str)
def run_model(conf, project_id, data_source_id, model_id, job_id, **kwargs): """ run model by model_id and the parameter config :param conf: :param project_id: :param data_source_id: :param model_id: :param job_id: :param kwargs: :return: """ model = model_business.get_by_model_id(model_id) project = project_business.get_by_id(project_id) ownership = ownership_business.get_ownership_by_owned_item(project, 'project') result_dir = os.path.join(user_directory, ownership.user.user_ID, project.name, job_id) # import model function if model['category'] == ModelType['neural_network']: # keras nn f = getattr(models, model.entry_function) input_dict = manage_nn_input(conf, data_source_id, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, input_dict, result_dir=result_dir) elif model['category'] == ModelType['unstructured']: # input from folder f = getattr(models, model.entry_function) input_dict = model_input_manager_unstructured(conf, data_source_id, **kwargs) return job_service.run_code(conf, project_id, None, model, f, job_id, input_dict, file_id=data_source_id, result_dir=result_dir) elif model['category'] == ModelType['hyperopt']: f = getattr(models, model.entry_function) fit = conf.get('fit', None) data_fields = fit.get('data_fields', [[], []]) input_dict = model_input_manager_custom_supervised(data_fields, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, input_dict, result_dir=result_dir) else: # custom models f = models.custom_model model_fn = getattr(models, model.entry_function) fit = conf.get('fit', None) if model['category'] == ModelType['custom_supervised']: data_fields = fit.get('data_fields', [[], []]) input_dict = model_input_manager_custom_supervised(data_fields, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, model_fn, input_dict, result_dir=result_dir) if model['category'] == ModelType['unsupervised']: x_cols = fit.get('data_fields', []) input_dict = model_input_manager_unsupervised(x_cols, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, model_fn, input_dict, result_dir=result_dir) if model['category'] == ModelType['hyperopt']: data_fields = fit.get('data_fields', [[], []]) input_dict = model_input_manager_custom_supervised(data_fields, data_source_id, model.name, **kwargs) return job_service.run_code(conf, project_id, data_source_id, model, f, job_id, model_fn, input_dict, result_dir=result_dir)
def start_project_playground(project_id): # generate the project volume path project = project_business.get_by_id(project_id) user_ID = ownership_business.get_owner(project, 'project').user_ID volume_dir = os.path.join(USER_DIR, user_ID, project.name, 'volume/') if not os.path.exists(volume_dir): os.makedirs(volume_dir) abs_volume_dir = os.path.abspath(volume_dir) deploy_name = project_id + '-jupyter' port = port_for.select_random(ports=set(range(30000, 32767))) # port = network_utility.get_free_port_with_range(30000, 32767) kube_json = { "apiVersion": "apps/v1beta1", "kind": "Deployment", "metadata": { "name": deploy_name }, "spec": { "template": { "metadata": { "labels": { "app": project_id } }, "spec": { # "securityContext": { # "runAsUser": 1001, # }, "containers": [ { "name": project_id, "image": "10.52.14.192/gzyw/jupyter_app", "imagePullPolicy": "IfNotPresent", "ports": [{ "containerPort": 8888, # "hostPort": port }], "stdin": True, "command": ['python'], "args": ["-m", "notebook", "--no-browser", "--allow-root", "--ip=0.0.0.0", "--NotebookApp.allow_origin=*", "--NotebookApp.disable_check_xsrf=True", "--NotebookApp.token=''", "--NotebookApp.iopub_data_rate_limit=10000000000"], "volumeMounts": [{ "mountPath": "/home/root/work/volume", "name": project_id + "-volume" }] } ], "volumes": [{ "name": project_id + "-volume", "hostPath": {"path": abs_volume_dir}, }] }, }, } } service_json = { "kind": "Service", "apiVersion": "v1", "metadata": { "name": "my-" + project_id + "-service" }, "spec": { "type": "NodePort", "ports": [ { "port": 8888, "nodePort": port } ], "selector": { "app": project_id } } } # import json # from server3.utility import file_utils # file_utils.write_to_filepath(json.dumps(kube_json), './jupyter_app.json') # return api = kube_service.deployment_api s_api = kube_service.service_api api.create_namespaced_deployment(body=kube_json, namespace=NAMESPACE) replicas = api.read_namespaced_deployment_status( deploy_name, NAMESPACE).status.available_replicas # wait until deployment is available while replicas is None or replicas < 1: replicas = api.read_namespaced_deployment_status( deploy_name, NAMESPACE).status.available_replicas # FIXME one second sleep to wait for container ready import timemr time.sleep(1) s_api.create_namespaced_service(body=service_json, namespace=NAMESPACE) time.sleep(1) return port
def first_deploy(user_ID, job_id, name, description, input_info, output_info, examples, server, input_type, model_name, projectId, is_private, **optional): """ :param user_ID: :param job_id: :param name: :param description: :param input_info: :param output_info: :param examples: :param server: :param input_type: :param model_name: :param is_private: :param optional: :return: """ job = job_business.get_by_job_id(job_id) job_info = job.to_mongo() project = project_business.get_by_id(projectId) related_fields = project.related_fields related_tasks = project.related_tasks tags = project.tags # if not deployed do the deployment try: served_model_business.get_by_job(job) except DoesNotExist: model_type = job.model.category if model_type == ModelType['neural_network'] \ or model_type == ModelType['unstructured']: export_path, version = model_service.export(job_id, user_ID) else: result_sds = staging_data_set_business.get_by_job_id(job_id) saved_model_path_array = result_sds.saved_model_path.split('/') version = saved_model_path_array.pop() export_path = '/'.join(saved_model_path_array) cwd = os.getcwd() deploy_name = job_id + '-serving' service_name = "my-" + job_id + "-service" port = port_for.select_random(ports=set(range(30000, 32767))) export_path = "/home/root/work/user_directory" + \ export_path.split("/user_directory", 1)[1] # export_path = "/home/root/work/user_directory" + export_path.split("/user_directory", 1)[1] kube_json = { "apiVersion": "apps/v1beta1", "kind": "Deployment", "metadata": { "name": deploy_name }, "spec": { "template": { "metadata": { "labels": { "app": job_id } }, "spec": { "containers": [ { "name": job_id, "image": "10.52.14.192/gzyw/serving_app", "imagePullPolicy": "IfNotPresent", "ports": [{ "containerPort": 9000, }], "stdin": True, "command": ['tensorflow_model_server'], "args": ['--enable_batching', '--port={port}'.format( port=SERVING_PORT), '--model_name={name}'.format( name=model_name), '--model_base_path={export_path}'.format( export_path=export_path)], "volumeMounts": [ { "mountPath": "/home/root/work/user_directory", "name": "nfsvol" }, ] } ], "volumes": [ { "name": "nfsvol", "persistentVolumeClaim": { "claimName": "nfs-pvc" } }, ] }, }, } } service_json = { "kind": "Service", "apiVersion": "v1", "metadata": { "name": service_name }, "spec": { "type": "NodePort", "ports": [ { "port": 9000, "nodePort": port } ], "selector": { "app": job_id } } } # import json # from server3.utility import file_utils # file_utils.write_to_filepath(json.dumps(kube_json), './jupyter_app.json') # return api = kube_service.deployment_api s_api = kube_service.service_api resp = api.create_namespaced_deployment(body=kube_json, namespace=NAMESPACE) s_api.create_namespaced_service(body=service_json, namespace=NAMESPACE) # tf_model_server = './tensorflow_serving/model_servers/tensorflow_model_server' # p = subprocess.Popen([ # tf_model_server, # '--enable_batching', # '--port={port}'.format(port=SERVING_PORT), # '--model_name={name}'.format(name=name), # '--model_base_path={export_path}'.format(export_path=export_path) # ], start_new_session=True) # add a served model entity server = server.replace('9000', str(port)) data_fields = job_info['params']['fit']['data_fields'] job_info['staging_data_set'] = job['staging_data_set'][ 'name'] if job['staging_data_set'] else None job_info['staging_data_set_id'] = job['staging_data_set'][ 'id'] if job['staging_data_set'] else None staging_data_demo = staging_data_service.get_first_one_by_staging_data_set_id( job_info['staging_data_set_id']) one_input_data_demo = [] for each_feture in job_info['params']['fit']['data_fields'][0]: one_input_data_demo.append(staging_data_demo[each_feture]) input_data_demo_string = '[' + ",".join( str(x) for x in one_input_data_demo) + ']' input_data_demo_string = '[' + input_data_demo_string + ',' + input_data_demo_string + ']' return first_save_to_db(user_ID, name, description, input_info, output_info, examples, version, deploy_name, server, input_type, export_path, job, job_id, model_name, related_fields, related_tasks, tags, is_private, data_fields, input_data_demo_string, service_name,projectId, **optional)
def fork(project_id, new_user_ID): """ fork project :param project_id: :param new_user_ID: :return: """ # get project project = project_business.get_by_id(project_id) # get ownership, and check privacy ownership = ownership_business.get_ownership_by_owned_item( project, 'project') if ownership.private is True: raise NameError('forked project is private, fork failed') if ownership.user.user_ID == new_user_ID: raise NameError('you are forking your self project') # get user object user = UserBusiness.get_by_user_ID(new_user_ID) # copy and save project project_cp = project_business.copy(project) # create ownership relation ownership_business.add(user, True, project=project_cp) # copy staging data sets sds_array = staging_data_set_business.get_by_project_id(project_id, False) for sds in sds_array: staging_data_service.copy_staging_data_set(sds, project_cp) # copy jobs and save jobs = project.jobs jobs_cp = [] for job in jobs: # get source sds if hasattr(job, 'staging_data_set') and job.staging_data_set: sds_cp = staging_data_set_business.get_by_name_and_project( job.staging_data_set.name, job.staging_data_set.project) # sds_cp = staging_data_service.copy_staging_data_set( # job.staging_data_set, project_cp) else: sds_cp = None # copy job job_cp = job_business.copy_job(job, project_cp, sds_cp) if not job_cp: continue jobs_cp.append(job_cp) # copy result staging data set by job and bind to project try: # get result sds result_sds = staging_data_set_business.get_by_job_id(job['id']) # bind job to sds staging_data_set_business.update_job_by_name_and_project( result_sds.name, result_sds.project, job_cp) # staging_data_service.copy_staging_data_set(result_sds, project_cp, # belonged_job=job_cp) except DoesNotExist: pass project_business.update_by_id(project_cp['id'], jobs=jobs_cp) project_cp.reload() return project_cp
def add_staging_data_set_by_data_set_id(sds_name, sds_description, project_id, data_set_id): """ Create staging_data_set and copy to staging_data by original data_set id :param sds_name: str :param sds_description: str :param project_id: ObjectId :param data_set_id: ObjectId :return: new staging_data_set object """ # get project object # project = project_business.get_by_id(project_id) # create new staging data set ds_obj = data_set_business.get_by_id(data_set_id) ds = ds_obj.to_mongo() ds.pop('name') ds.pop('description') sds = staging_data_set_business.add(sds_name, sds_description, project_id, **ds) # update project info # note: related_field in data set become related_fields here project_business.update_items_to_list_field( project_id, tags=ds.get('tags', []), related_tasks=ds.get('related_tasks', []), related_fields=ds.get('related_field', [])) # generate the project volume path project = project_business.get_by_id(project_id) user_ID = ownership_business.get_owner(project, 'project').user_ID volume_dir = os.path.join(USER_DIR, user_ID, project.name, 'volume/') if not os.path.exists(volume_dir): os.makedirs(volume_dir) # copy data from data(raw) to staging data # get all data objects by data_set id try: # copy the file instance to project volume if hasattr(ds_obj, 'file') and ds_obj.file: file = ds_obj.file if os.path.isdir(file.uri): dst = os.path.join(volume_dir, os.path.dirname(file.uri)) # if dir exists, remove it and copytree, cause copytree will # create the dir if os.path.exists(dst): shutil.rmtree(dst) shutil.copytree(file.uri, dst) else: shutil.copy(file.uri, volume_dir) data_objects = data_business.get_by_data_set(data_set_id) # convert mongoengine objects to dicts data_objects = json_utility.me_obj_list_to_dict_list(data_objects) # remove data set id when import to sds for d in data_objects: d.pop('data_set') if data_objects: staging_data_business.add_many(sds, data_objects) return sds except Exception as e: # remove staging_data_set and staging_data staging_data_business.remove_by_staging_data_set_id(sds.id) staging_data_set_business.remove_by_id(sds.id) raise e