def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["manually"], "manually_param": { "filter_out_indexes": None, "filter_out_names": None, "left_col_indexes": [0, 1, 2], "left_col_names": ["x3"] } } pipeline = common_tools.make_normal_dsl(config, namespace, selection_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode binning_param = { "name": 'hetero_feature_binning_0', "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["manually", "iv_value_thres", "iv_percentile"], "manually_param": { "filter_out_indexes": [], "filter_out_names": [] }, "unique_param": { "eps": 1e-06 }, "iv_value_param": { "value_threshold": 0.1 }, "iv_percentile_param": { "percentile_threshold": 0.9 }, "variance_coe_param": { "value_threshold": 0.3 }, "outlier_param": { "percentile": 0.95, "upper_threshold": 2.0 } } pipeline = common_tools.make_single_predict_pipeline( config, namespace, selection_param, binning_param=binning_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) pipeline = make_normal_dsl(config, namespace) pipeline.fit() common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary()) common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode pipeline = make_normal_dsl(config, namespace) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary()) common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode fast_sbt_param = { "name": "fast_secureboost_0", "task_type": "classification", "learning_rate": 0.1, "num_trees": 4, "subsample_feature_rate": 1, "n_iter_no_change": False, "work_mode": "layered", "guest_depth": 2, "host_depth": 3, "tol": 0.0001, "bin_num": 50, "metrics": ["Recall", "ks", "auc", "roc"], "objective_param": { "objective": "cross_entropy" }, "encrypt_param": { "method": "iterativeAffine" }, "predict_param": { "threshold": 0.5 }, "validation_freqs": 1 } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["hetero_fast_sbt_filter"], "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 } } pipeline = common_tools.make_normal_dsl(config, namespace, selection_param, fast_sbt_param=fast_sbt_param) pipeline.fit(backend=backend, work_mode=work_mode) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode binning_param = { "name": 'hetero_feature_binning_0', "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["iv_value_thres", "iv_filter"], "iv_value_param": { "value_threshold": 1, "local_only": True }, "iv_top_k_param": { "k": 7, "local_only": False } } pipeline = common_tools.make_normal_dsl(config, namespace, selection_param, binning_param=binning_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["percentage_value"], "percentage_value_param": { "upper_pct": 0.8 }} pipeline = common_tools.make_normal_dsl(config, namespace, selection_param) pipeline.fit() common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) binning_param = { "name": 'hetero_feature_binning_0', "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } statistic_param = { "name": "statistic_0", "statistics": ["95%", "coefficient_of_variance", "stddev"], "column_indexes": -1, "column_names": [] } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "manually", "unique_value", "iv_value_thres", "coefficient_of_variation_value_thres", "iv_percentile", "outlier_cols" ], "manually_param": { "filter_out_indexes": [], "filter_out_names": [] }, "unique_param": { "eps": 1e-06 }, "iv_value_param": { "value_threshold": 0.1 }, "iv_percentile_param": { "percentile_threshold": 0.9 }, "variance_coe_param": { "value_threshold": 0.3 }, "outlier_param": { "percentile": 0.95, "upper_threshold": 2.0 }} pipeline = common_tools.make_normal_dsl(config, namespace, selection_param, binning_param=binning_param, statistic_param=statistic_param) pipeline.fit() common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode binning_param = { "name": 'hetero_feature_binning_0', "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } statistic_param = { "name": "statistic_0", "statistics": ["95%", "coefficient_of_variance", "stddev"], "column_indexes": -1, "column_names": [] } psi_param = {"name": "psi_0", "max_bin_num": 20} secureboost_param = { "name": "secureboost_0", "task_type": "classification", "learning_rate": 0.1, "num_trees": 5, "subsample_feature_rate": 1, "n_iter_no_change": False, "tol": 0.0001, "bin_num": 50, "objective_param": { "objective": "cross_entropy" }, "encrypt_param": { "method": "paillier" }, "predict_param": { "threshold": 0.5 }, "validation_freqs": 1 } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["iv_filter", "statistic_filter", "psi_filter", "hetero_sbt_filter"], "iv_param": { "metrics": ["iv", "iv", "iv"], "filter_type": ["threshold", "top_k", "top_percentile"], "take_high": True, "threshold": [0.03, 15, 0.7], "host_thresholds": [[0.15], None, None], "select_federated": True }, "statistic_param": { "metrics": ["skewness", "skewness", "kurtosis", "median"], "filter_type": "threshold", "take_high": [True, False, True, True], "threshold": [-10, 10, -1.5, -1.5] }, "psi_param": { "metrics": "psi", "filter_type": "threshold", "take_high": False, "threshold": -0.1 }, "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 } } pipeline = common_tools.make_normal_dsl(config, namespace, selection_param, binning_param=binning_param, statistic_param=statistic_param, psi_param=psi_param, sbt_param=secureboost_param) pipeline.fit(backend=backend, work_mode=work_mode) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())