def setup_loss_functions(data, coef_set, L0_max=None, loss_computation=None, w_pos=1.0): """ Parameters ---------- data coef_set L0_max loss_computation w_pos Returns ------- """ #todo check if fast/lookup loss is installed assert loss_computation in [None, 'weighted', 'normal', 'fast', 'lookup'] MAX_DISTINCT_XY_VALUES_FOR_LOOKUP_ON_NONINTEGER_DATA = 20 Z = data['X'] * data['Y'] if 'sample_weights' in data: sample_weights = _setup_training_weights( Y=data['Y'], sample_weights=data['sample_weights'], w_pos=w_pos) use_weighted = not np.all(np.equal(sample_weights, 1.0)) else: use_weighted = False integer_data_flag = np.all(Z == np.require(Z, dtype=np.int_)) distinct_points_flag = len(np.unique( Z, axis=0)) <= MAX_DISTINCT_XY_VALUES_FOR_LOOKUP_ON_NONINTEGER_DATA use_lookup_table = isinstance(coef_set, CoefficientSet) and (integer_data_flag or distinct_points_flag) if use_weighted: final_loss_computation = 'weighted' elif use_lookup_table: final_loss_computation = 'lookup' else: final_loss_computation = 'fast' if final_loss_computation != loss_computation: print_log("switching loss computation from %s to %s" % (loss_computation, final_loss_computation)) if final_loss_computation == 'weighted': from riskslim.loss_functions.log_loss_weighted import \ log_loss_value, \ log_loss_value_and_slope, \ log_loss_value_from_scores Z = np.require(Z, requirements=['C']) total_sample_weights = np.sum(sample_weights) compute_loss = lambda rho: log_loss_value(Z, sample_weights, total_sample_weights, rho) compute_loss_cut = lambda rho: log_loss_value_and_slope( Z, sample_weights, total_sample_weights, rho) compute_loss_from_scores = lambda scores: log_loss_value_from_scores( sample_weights, total_sample_weights, scores) elif final_loss_computation == 'normal': from riskslim.loss_functions.log_loss import \ log_loss_value, \ log_loss_value_and_slope, \ log_loss_value_from_scores Z = np.require(Z, requirements=['C']) compute_loss = lambda rho: log_loss_value(Z, rho) compute_loss_cut = lambda rho: log_loss_value_and_slope(Z, rho) compute_loss_from_scores = lambda scores: log_loss_value_from_scores( scores) elif final_loss_computation == 'fast': from riskslim.loss_functions.fast_log_loss import \ log_loss_value, \ log_loss_value_and_slope, \ log_loss_value_from_scores Z = np.require(Z, requirements=['F']) compute_loss = lambda rho: log_loss_value(Z, rho) compute_loss_cut = lambda rho: log_loss_value_and_slope(Z, rho) compute_loss_from_scores = lambda scores: log_loss_value_from_scores( scores) elif final_loss_computation == 'lookup': from riskslim.loss_functions.lookup_log_loss import \ get_loss_value_and_prob_tables, \ log_loss_value, \ log_loss_value_and_slope, \ log_loss_value_from_scores s_min, s_max = get_score_bounds(Z_min=np.min(Z, axis=0), Z_max=np.max(Z, axis=0), rho_lb=coef_set.lb, rho_ub=coef_set.ub, L0_reg_ind=np.array( coef_set.c0) == 0.0, L0_max=L0_max) Z = np.require(Z, requirements=['F'], dtype=np.float) print_log("%d rows in lookup table" % (s_max - s_min + 1)) loss_value_tbl, prob_value_tbl, tbl_offset = get_loss_value_and_prob_tables( s_min, s_max) compute_loss = lambda rho: log_loss_value(Z, rho, loss_value_tbl, tbl_offset) compute_loss_cut = lambda rho: log_loss_value_and_slope( Z, rho, loss_value_tbl, prob_value_tbl, tbl_offset) compute_loss_from_scores = lambda scores: log_loss_value_from_scores( scores, loss_value_tbl, tbl_offset) # real loss functions if final_loss_computation == 'lookup': from riskslim.loss_functions.fast_log_loss import \ log_loss_value as loss_value_real, \ log_loss_value_and_slope as loss_value_and_slope_real,\ log_loss_value_from_scores as loss_value_from_scores_real compute_loss_real = lambda rho: loss_value_real(Z, rho) compute_loss_cut_real = lambda rho: loss_value_and_slope_real(Z, rho) compute_loss_from_scores_real = lambda scores: loss_value_from_scores_real( scores) else: compute_loss_real = compute_loss compute_loss_cut_real = compute_loss_cut compute_loss_from_scores_real = compute_loss_from_scores return (Z, compute_loss, compute_loss_cut, compute_loss_from_scores, compute_loss_real, compute_loss_cut_real, compute_loss_from_scores_real)
weights = np.empty(n_rows) weights[pos_ind] = w_pos weights[~pos_ind] = w_neg weights = weights.flatten() #generate data Z = generate_binary_data(n_rows, n_cols) rho = generate_integer_model(n_cols, rho_ub, rho_lb) L0_reg_ind = np.ones(n_cols, dtype='bool') L0_reg_ind[0] = False Z_min = np.min(Z, axis = 0) Z_max = np.max(Z, axis = 0) #create lookup table min_score, max_score = get_score_bounds_from_range(Z_min, Z_max, rho_lb, rho_ub, L0_max = n_cols) loss_value_tbl, prob_value_tbl, loss_tbl_offset = lookup.get_loss_value_and_prob_tables(min_score, max_score) loss_tbl_offset = int(loss_tbl_offset) #assert correctnes of log_loss from scores function for s in range(int(min_score), int(max_score)+1): normal_value = normal.log_loss_value_from_scores(np.array(s, dtype = Z.dtype, ndmin = 1)) #loss_value_tbl[s+loss_tbl_offset] cython_value = fast.log_loss_value_from_scores(np.array(s, dtype = Z.dtype, ndmin = 1)) table_value = loss_value_tbl[s+loss_tbl_offset] lookup_value = lookup.log_loss_value_from_scores(np.array(s,dtype = Z.dtype, ndmin = 1), loss_value_tbl, loss_tbl_offset) assert(np.isclose(normal_value, cython_value, rtol = 1e-06)) assert(np.isclose(table_value, cython_value, rtol = 1e-06)) assert(np.isclose(table_value, normal_value, rtol = 1e-06)) assert(np.equal(table_value, lookup_value)) print "all tests passed"