def host_optimal_binning(self, data_instances, host_idx, encrypted_bin_info, result_counts, category_names): optimal_binning_params = encrypted_bin_info['optimal_params'] host_model_params = copy.deepcopy(self.model_param) host_model_params.bin_num = optimal_binning_params.get('bin_num') host_model_params.optimal_binning_param.metric_method = optimal_binning_params.get( 'metric_method') host_model_params.optimal_binning_param.mixture = optimal_binning_params.get( 'mixture') host_model_params.optimal_binning_param.max_bin_pct = optimal_binning_params.get( 'max_bin_pct') host_model_params.optimal_binning_param.min_bin_pct = optimal_binning_params.get( 'min_bin_pct') event_total, non_event_total = self.get_histogram(data_instances) result_counts = dict(result_counts.collect()) optimal_binning_cols = { x: y for x, y in result_counts.items() if x not in category_names } host_binning_obj = OptimalBinning( params=host_model_params, abnormal_list=self.binning_obj.abnormal_list) host_binning_obj.event_total = event_total host_binning_obj.non_event_total = non_event_total host_binning_obj = self.optimal_binning_sync(host_binning_obj, optimal_binning_cols, data_instances.count(), data_instances.partitions, host_idx) return host_binning_obj
def optimal_binning_sync(self, result_counts, sample_count, partitions, host_idx, host_model_params): host_binning_obj = OptimalBinning(params=host_model_params, abnormal_list=self.binning_obj.abnormal_list) host_binning_obj.event_total = self.binning_obj.event_total host_binning_obj.non_event_total = self.binning_obj.non_event_total LOGGER.debug("Start host party optimal binning train") bucket_table = host_binning_obj.bin_sum_to_bucket_list(result_counts, partitions) host_binning_obj.fit_buckets(bucket_table, sample_count) encoded_split_points = host_binning_obj.bin_results.all_split_points self.transfer_variable.bucket_idx.remote(encoded_split_points, role=consts.HOST, idx=host_idx) return host_binning_obj