num_bins = 8 bin_size = int(np.ceil(len(a)/num_bins)) bins = [a[i*bin_size:(i+1)*bin_size] for i in range(num_bins)] X = D[:, cols] y = D[:, c.isalert] auc = num_bins*[0] fpr = num_bins*[0] tpr = num_bins*[0] w = num_bins*[0] b = num_bins*[0] for i in range(num_bins): train = [item for j in range(num_bins) if j != i for item in bins[j]] test = bins[i] auc[i], fpr[i], tpr[i], w[i], b[i] = fit_logistic_regression( X[train,:], y[train,:], X[test,:], y[test,:], C=C) def save_result(save_path): with open(save_path, 'w') as f: json.dump(get_result(), f, indent=4) def get_result(): wl = [a[0].tolist() for a in w] save_data = dict(zip( ['weights', 'auc', 'intercepts', 'C', 'num_bins', 'dataset_size', 'generator'], [wl, auc, b, C, num_bins, D.shape[0], __file__] )) return save_data
X = D[:, cols] X = X[a[:320000], :] y = D[a[:320000], c('isalert')] y = y.astype(int)^1 Xt = D[:, cols] Xt = Xt[a[320000:], :] yt = D[a[320000:], c('isalert')] yt = yt.astype(int)^1 num_tests = 1 auc = num_tests*[0] fpr = num_tests*[0] tpr = num_tests*[0] w = num_tests*[0] b = num_tests*[0] for i in range(num_tests): auc[i], fpr[i], tpr[i], w[i], b[i] = fit_logistic_regression(X, y, Xt, yt, C=10**(i+3)) def save_result(append=''): wl = [a[0].tolist() for a in w] save_data = dict(zip(['weights', 'auc', 'intercepts'], [wl, auc, b])) save_path = '{0}/session/17-recreating-winning-entry'\ '/data/regression-result.json'.format(path) with open(save_path, 'w') as f: json.dump(save_data, f)
Xt = Xt[ts_rows, :] yt = D[ts_rows, c('isalert')] auc = np.zeros((max_features,90)); # Remove P3, P6, P8, V7 and V9 and # the corresponding running features. # See session 9 on data exploration # for details cc = LabelIndex(L_ex[4:]) exclude = cc('p3', 'p6', 'p8', 'v7', 'v9', 'mp3', 'mp6', 'mp8', 'mv7', 'mv9', 'sdp3', 'sdp6', 'sdp8', 'sdv7', 'sdv9') candidates = [i for i in range(90) if i not in exclude] chosen = [] for i in range(max_features): for c in candidates: features = chosen + [c] result = fit_logistic_regression(X[:, features], y, Xt[:, features], yt) auc[i, c] = result[0] chosen_feature = auc[i,:].argmax() if auc[i,chosen_feature] <= auc[i-1,:].max(): break candidates.remove(chosen_feature) chosen.append(chosen_feature)