def k_folds_linear_gd(df_test, df_train, Y): k = 10 df_test = gd.pandas_to_data(df_test) k_folds = partition_folds(df_test, k) model = Model_w() theta = None for ki in range(k - 1): print "k fold is {}".format(k) data, truth = get_data_and_truth(k_folds[ki]) binary = True model.update(gd.gradient(data, np.array(truth), 0.00001, max_iterations=5, binary=binary)) print model.w if theta is None: theta, max_acc = get_best_theta(data, truth, model.w, binary, False) predict = gd.predict_data(data, model.w, binary, False, theta) error = mystats.get_error(predict, truth, binary) print "Error for fold {} is {} with theta = {}".format(k, error, theta) test, truth = get_data_and_truth(k_folds[k - 1]) predict = gd.predict_data(test, model.w, binary, False, theta) test_error = mystats.get_error(predict, truth, binary) return [error, test_error]
def get_best_theta(data, truth, model, binary, logistic): best_theta = None max_acc = 0 modmin = min(model) modmax = max(model) for theta_i in range(100): theta = modmin + float(theta_i) / (modmax - modmin) predict = gd.predict_data(data, model, binary, False, theta) acc = mystats.get_error(predict, truth, binary) if best_theta is None: best_theta = theta max_acc = acc elif acc > max_acc: best_theta = theta max_acc = acc return best_theta, max_acc