def get_histogram(data_instances): static_obj = statics.MultivariateStatisticalSummary(data_instances, cols_index=-1) label_historgram = static_obj.get_label_histogram() event_total = label_historgram.get(1, 0) non_event_total = label_historgram.get(0, 0) if event_total == 0 or non_event_total == 0: LOGGER.warning(f"event_total or non_event_total might have errors, event_total: {event_total}," f" non_event_total: {non_event_total}") return event_total, non_event_total
def zeros(self, data_shape, fit_intercept, data_instances): """ If fit intercept, use the following formula to initialize b can get a faster converge rate b = log(P(1)/P(0)) """ inits = np.zeros(data_shape) if fit_intercept and data_instances is not None: static_obj = statics.MultivariateStatisticalSummary(data_instances, cols_index=-1) label_historgram = static_obj.get_label_histogram() LOGGER.debug("label_histogram is : {}".format(label_historgram)) one_count = label_historgram.get(1) zero_count = label_historgram.get(0, 0) + label_historgram.get(-1, 0) init_intercept = np.log((one_count / zero_count)) inits[-1] = init_intercept return inits