def get_builtin_data_checkbox(): """ @@@ ### description > Obtains the current data type. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | ### request ```json {"task_id": 1} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ task_id = request.form.get('task_id') dataset = DataSetApi.get_dataset(task_id) inline_datasets = DataSetApi.get_inline_dataset_names() if dataset in inline_datasets: return jsonify({'is_inline': True}) return jsonify({'is_inline': False})
def get_edit_customize_dataset_path(): """ @@@ ### description > Obtaining a User-Defined Data Path. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | ### request ```json {"task_id": 1} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ task_id = request.form.get('task_id') dataset = DataSetApi.get_dataset(task_id) inline_datasets = DataSetApi.get_inline_dataset_names() if dataset not in inline_datasets: return jsonify({'customize_path': dataset}) return jsonify({})
def set_dataset_info(): """ @@@ ### description > Setting Task Data Information. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | | name_path | false | body | str | task path string | ### request ```json {"task_id": 1} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ task_id = request.form.get('task_id') name_path = request.form.get('name_path') dataset = DataSetApi(name_path) result = dataset.set_dataset_info(task_id) if result: return jsonify({ 'status': 200, 'task_id': task_id, 'data': _('The dataset is set successfully.') }) return jsonify({ 'status': 400, 'task_id': task_id, 'data': _('The dataset fail to be set.') })
def read_task(file_name): """ Read the configuration file and generate the task. Parameters ---------- file_name:str profile path. Returns ------- task_id: int task key in the database. """ with open(file_name, 'r') as file: try: params_config = yaml.safe_load(file) if "task_params" in params_config.keys(): alg = params_config['task_params']['algorithm'] task_name = params_config['task_params']['task_name'] task_type = params_config['task_params']['task_type'] else: alg = file_name.split(os.path.sep)[-1].split(".")[0].upper() task_name = alg task_type = 2 task_id = TaskApi().add_task(task_type, task_name) if "path" in params_config['dataset_params'].keys(): x_file = params_config['dataset_params']['path'] dag_file = None else: x_file = params_config['dataset_params']['x_file'] dag_file = params_config['dataset_params']['dag_file'] if x_file is not None: dataset = DataSetApi(x_file) dataset.set_dataset_info(task_id) if dag_file is not None: true_dag = read_file(dag_file) TaskApi().update_true_dag(task_id, true_dag) task_path = os.path.join(FILE_PATH, 'task', task_id) file_name = os.path.join(task_path, "true.txt") save_gragh_edges(true_dag, file_name) algorithm = AlgorithmApi( alg, json.dumps(params_config['algorithm_params'])) algorithm.set_algorithm_info(task_id) except yaml.YAMLError as exc: print(exc) return task_id
def check_dataset(): """ @@@ ### description > Check whether the file path exists. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | path | false | body | str | file path string | ### request ```json {"path": xxxx} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ path = request.form.get('path') check_result = DataSetApi.check_dataset(path) if check_result: return jsonify({"column_num": check_result}) else: return jsonify({ 'status': 403, 'data': _('The verification result is false.') })
def get_inline_dataset_names(): """ @@@ ### description > Obtains the built-in data name list. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | ### request ```json {"task_id": 2} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ task_id = request.form.get('task_id') update_inline_datasets() inline_name = INLINE_DATASETS selected_dataset = DataSetApi.get_dataset(task_id) if selected_dataset: return jsonify({ 'inline_datasets': inline_name, 'selected_dataset': selected_dataset }) return jsonify({'inline_datasets': inline_name})
def get_causal_relationship(): """ @@@ ### description > Obtains the causal relationship of a task. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | ### request ```json {"task_id": 1} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ task_id = request.form.get("task_id") result_data = os.path.join(FILE_PATH, "task", task_id) result_dict = dict() task_type = TaskApi().get_task_type(task_id) if task_type == 1: dataset_path = DataSetApi.get_dataset(task_id) task_name = TaskApi().get_task_name(task_id) dataset_file = os.path.join( dataset_path, "node_relationship_" + str(task_id) + "_" + task_name + ".csv") with open(dataset_file, "r") as res_file: res_list = res_file.readlines() result_dict.update({dataset_file: json.loads(res_list[0])}) elif task_type == 2: for dir_path, _, file_names in os.walk(result_data): for file_name in file_names: result_file = os.path.join(dir_path, file_name) with open(result_file, "r") as res_file: res_list = res_file.readlines() result_dict.update({file_name: json.loads(res_list[0])}) if len(result_dict) > 1: pre_list = [tuple(pl) for pl in list(result_dict.values())[0]] true_list = [tuple(tl) for tl in list(result_dict.values())[1]] result_dict["common"] = list( set(pre_list).intersection(set(true_list))) result_dict["pre_common"] = list( set(pre_list).difference(set(true_list))) result_dict["true_common"] = list( set(true_list).difference(set(pre_list))) return jsonify(result_dict)
def download_file(): """ @@@ ### description > Downloading task-related files in the list. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | ### request ```json {"task_id": 1} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ file_name = None task_id = request.form.get("task_id") task_api = TaskApi() task_type = task_api.get_task_type(task_id) if task_type == 1: data_path = DataSetApi.get_dataset(task_id) if data_path is None: data_path = os.path.join(FILE_PATH, 'inline') task_api = TaskApi() task_name = task_api.get_task_name(task_id) file_name = zip_data_file(task_id, task_name, data_path) elif task_type == 2: file_name = zip_alg_file(task_id) if file_name: response = make_response(send_file(file_name)) response.headers["Content-Disposition"] = \ "attachment;" \ "filename*=UTF-8''{utf_filename}".format( utf_filename=(task_id + ".zip")) return response else: return jsonify({ "status": 400, "data": _("The result file does not exist.") })
def add_task(): """ @@@ ### description > New Task. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | | task_name | false | body | str | task name | | task_type | false | body | str | task type | ### request ```json {"task_id": 1, "task_name": xxxx, "task_type": 2} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ task_id = request.form.get('task_id') task_name = request.form.get('task_name') task_type = int(request.form.get('task_type')) if task_id: task_id = TaskApi().add_task(task_type, task_name, task_id) dataset = DataSetApi.get_dataset(task_id) if dataset == "": dataset = os.path.join(FILE_PATH, 'inline') else: task_id = TaskApi().add_task(task_type, task_name) if task_type == 1: dataset = os.path.join(FILE_PATH, 'inline') else: update_inline_datasets() if INLINE_DATASETS: dataset = INLINE_DATASETS[0] else: dataset = None return jsonify({'task_id': task_id, 'task_path': dataset})
def run_task(): """ @@@ ### description > Perform causal discovery or data generation tasks. ### args | args | nullable | request type | type | remarks | |--------------|----------|--------------|------|--------------------------| | task_id | false | body | int | task key in the database | ### request ```json {"task_id": 1} ``` ### return ```json {"status": xxxx, "data": xxxx} ``` @@@ """ result = None task_id = request.form.get('task_id') dataset = DataSetApi.get_dataset(task_id) algorithm_info = AlgorithmApi.get_algorithm(task_id) tasks = {1: run.run_data, 2: run.run_task} task_type = TaskApi().get_task_type(task_id) if dataset and len(algorithm_info) > 0 and task_type in tasks.keys(): result = tasks[task_type](task_id, dataset, algorithm_info['algorithm'], algorithm_info['parameters']) if result: status_code = 200 data = _('The task succeeds to begin to run.') else: status_code = 400 data = _('The task fails to begin to run.') return jsonify({'status_code': status_code, 'data': data})
def save_param(task_id): """ Save the exported configuration file. Parameters ---------- task_id: int task key in the database. Returns ------- filename: str profile path. """ alg = AlgorithmApi.get_algorithm(task_id) alg_name = None if 'algorithm' in alg.keys(): alg_name = alg['algorithm'] alg_params = None if 'parameters' in alg.keys(): alg_params = alg['parameters'] path = DataSetApi.get_dataset(task_id) task_type = TaskApi().get_task_type(task_id) task_name = TaskApi().get_task_name(task_id) task_data = { "algorithm_params": conversion_type(alg_name, alg_params), "task_params": { "algorithm": alg_name, "task_type": task_type, "task_name": task_name } } if task_type == 2: if path in DataSetApi.get_inline_dataset_names(): path_task_id = path.split("_")[0] sample_path = os.path.join(FILE_PATH, "sample_" + path_task_id + ".csv") true_dag_path = os.path.join(FILE_PATH, "true_dag_" + path_task_id + ".npz") task_data.update({ "dataset_params": { "path": path, "x_file": sample_path, "dag_file": true_dag_path } }) if alg_name == "TTPM": topo_path = os.path.join(FILE_PATH, "topo_" + path_task_id + ".npz") task_data["dataset_params"].update( {"topology_file": topo_path}) else: task_data.update( {"dataset_params": { "x_file": path, "dag_file": None }}) if alg_name == "TTPM": task_data["dataset_params"].update({"topology_file": None}) else: sample_path = os.path.join(path, "sample_" + task_id + ".csv") true_dag_path = os.path.join(path, "true_dag_" + task_id + ".npz") task_data.update({ "dataset_params": { "path": path, "x_file": sample_path, "dag_file": true_dag_path } }) if alg_name == "EVENT": topo_path = os.path.join(path, "topo_" + task_id + ".npz") task_data["dataset_params"].update({"topology_file": topo_path}) filename = os.path.join(FILE_PATH, task_name + ".yaml") with open(filename, 'w') as dumpfile: try: dumpfile.write(yaml.dump(task_data)) except yaml.YAMLError as exc: print(exc) return filename
def run_task(task_id, dataset, algorithm, parameters=None): """ Executing the Causal Discovery Algorithm Task. Parameters ---------- task_id: int task key in the database. dataset: str data path. algorithm: str algorithm name. parameters: dict algorithm parameters. Returns ------- : bool True: The causal discovery algorithm task is executed successfully. False: The cause-and-effect discovery algorithm task fails to be executed. """ X = None true_dag = None topology_matrix = None if dataset in INLINE_DATASETS: try: file_path_list = dataset.split(os.path.sep) file_name = '.'.join(file_path_list[-1].split(".")[:-1]) X = read_file(dataset, header=0) true_file_name = os.path.join( os.path.sep.join(file_path_list[:-2]), "true", file_name + ".npz") if os.path.exists(true_file_name): true_dag = read_file(true_file_name) topo_file_name = os.path.join( os.path.sep.join(file_path_list[:-2]), "topo_" + file_name + ".npz") if os.path.exists(topo_file_name): topology_matrix = read_file(topo_file_name) except OSError as error: logger.warning('alg run fail %s' % str(error)) return False elif DataSetApi.check_dataset(dataset): if '.xls' in dataset: X = pd.read_excel(dataset, dtype=np.float64) elif '.csv' in dataset: X = pd.read_csv(dataset) else: return False if algorithm not in CHECK_INLINE_ALGORITHMS: return False else: thread = threading.Thread(target=causal_discovery, kwargs={ "data": X, "true_dag": true_dag, "alg": algorithm, "algorithm_params": parameters, "task_id": task_id, 'topology_matrix': topology_matrix }) thread.start() return True