示例#1
0
def submission(x_test, w, i):
    x_test = remove_columns(x_test)
    x_test = replace_outliers_with_mean(x_test)
    x_test = standardize(x_test)
    #x_test = build_poly(x_test,3)
    x_test = addones(x_test)

    y_predictions = predict_labels(w, x_test)
    y_predictions = predict_reverse(y_predictions)
    y_predictions.reshape(y_predictions.shape[0], )
    create_csv_submission(i, y_predictions, 'data/sample-submission.csv')
示例#2
0
    def create_submission(self, inputs, name):
        """Create the submission csv file.
        Args:
            inputs: Test data to run the predictions on
            name: The name of the submission file
        """
        pred = self.predict(inputs)

        if self.use_logistic:
            pred[np.where(pred == 0)] = -1

        create_csv_submission(list(range(350000, 350000 + len(pred))), pred,
                              name)
        return name
示例#3
0
    def predict_test(self, x=None, ids=None):
        if x is None or ids is None:
            if self._orig_test is False:
                _, _tX_test, self._ids_test = load_csv_data(
                    self._DATA_TEST_PATH)
                _, self._tX_test = self.prepare_all_data(None, _tX_test)
                self._tX_orig = self._tX_test.copy()
                _, self._tX_test = self._prepare_model_data(
                    None, self._tX_test)
                self._orig_test = True
        else:
            _, self._tX_test = self.prepare_all_data(None, x.copy())
            self._tX_orig = self._tX_test.copy()
            self._ids_test = ids.copy()
            _, self._tX_test = self._prepare_model_data(None, self._tX_test)
            self._orig_test = False

        y_test_pred = self._predict(self._tX_test)
        create_csv_submission(self._ids_test, y_test_pred, self._output_path)
示例#4
0
def main():
    """
    The main function that initializes the final training and prediction of the proposed models.
    """
    # Load train and test datasets
    data_obj = DataLoader()

    # Train model for each jet and get predictions
    print("Jet 0")
    ids_test_sub_0, y_pred_0 = best_model_predictions(data_obj=data_obj,
                                                      jet=0,
                                                      degrees=6)
    print("Jet 1")
    ids_test_sub_1, y_pred_1 = best_model_predictions(data_obj=data_obj,
                                                      jet=1,
                                                      degrees=10)
    print("Jet 2")
    ids_test_sub_2, y_pred_2 = best_model_predictions(data_obj=data_obj,
                                                      jet=2,
                                                      degrees=4)
    print("Jet 3")
    ids_test_sub_3, y_pred_3 = best_model_predictions(data_obj=data_obj,
                                                      jet=3,
                                                      degrees=6)

    # Concatenate all the predictions with their label
    ids_all = np.concatenate(
        (ids_test_sub_0, ids_test_sub_1, ids_test_sub_2, ids_test_sub_3),
        axis=0)
    preds_all = np.concatenate((y_pred_0, y_pred_1, y_pred_2, y_pred_3),
                               axis=0)

    # Change 0 label to -1
    preds_all = np.where(preds_all == 0, -1, preds_all)
    OUTPUT_PATH = './../results/predictions/best_model_predictions.csv'

    # Create submission
    create_csv_submission(ids_all, preds_all, OUTPUT_PATH)
    print("Predictions have been created.")
示例#5
0
def generate_submission(ids_te, Y_te):
    """
    Generate submission in submissions path.

    Args:
        ids_te (ndarray): array with IDs of samples
        Y_te (ndarray): array with class labels of samples

    Returns:
        None
    """

    # generate submission
    print("[!] Generating Submission...")
    date_time = START_TIME
    # TODO replace whitespaces in function names
    csv_name = f"HB_SUBMISSION_{date_time}.csv"

    Path(SUBMISSION_PATH).mkdir(exist_ok=True)

    create_csv_submission(ids_te, Y_te, csv_name, SUBMISSION_PATH)
    print(f"[+] Submission {csv_name} was generated!")
示例#6
0
def combine_and_create_submission(predictions, ids_predicted, submission_name):
    ids_gathered = []
    predictions_gathered = []
    current_id = min(ids_predicted[:][0])
    length = np.sum(len(prediction) for prediction in predictions)
    print('\nGathering ids and predictions for each jet number together...')
    for _ in range(length):
        for jet_num in range(4):
            if len(ids_predicted[jet_num]) > 0:
                if ids_predicted[jet_num][0] == current_id:
                    predictions_gathered.append(predictions[jet_num][0])
                    ids_gathered.append(current_id)
                    predictions[jet_num] = np.delete(predictions[jet_num], 0)
                    ids_predicted[jet_num] = np.delete(ids_predicted[jet_num],
                                                       0)
                    break
        current_id += 1
    print('\n... ids and predictions for each jet number were gathered.')
    print('\n Creating submission file with name ', str(submission_name),
          ' ...')
    create_csv_submission(np.array(ids_gathered),
                          np.array(predictions_gathered), submission_name)
    print('\n... ', str(submission_name), ' is created. Ready to submit :) !')
示例#7
0
def create_prediction():
    """Create predictions for kaggle."""

    y, X, dict_mask_jets_train, ids = helpers_us.process_data('Data/train.csv', inv_log=True)
    best_param = [[2,0.0072],[2,0.1389],[2,0.1389]] #found with the function best_model_logistic
    best_w = []

    for i in range(len(dict_mask_jets_train)):
        xi = X[i]
        yi = y[dict_mask_jets_train[i]]
        _,_,w = cross_reg_logistic_regression(yi, xi, degree = best_param[i][0], k_fold=6,
                                             lambda_= best_param[i][1], max_iters = 500, gamma = -1, batch=35)
        best_w.append(w)

    y, X, dict_mask_jets_train, ids = helpers_us.process_data('Data/test.csv', inv_log=True)
    y_pred = np.zeros(y.shape[0])
    for i in range(len(dict_mask_jets_train)):
        xi = X[i]
        xi = modselection.build_poly(xi, 2)
        y_test_pred = modselection.predict_labels_logistic(best_w[i], xi)
        y_pred[dict_mask_jets_train[i]] = y_test_pred

    helpers.create_csv_submission(ids, y_pred, "true_prediction.csv")
示例#8
0
                                corr=1,
                                dimension_expansion=5,
                                bool_col=True)

x_te, _, _ = concatenate_log(x_te.copy(), mean_log=mean_log, std_log=std_log)
print("Test data cleaned.")

# 7. Build the polynomials
tx_te = []
for jet in range(4):
    tx_te.append(build_poly(x_te[jet], degree))
print("The test polynomials have been built.")

# 8. Predict and concatenate the predicitions
y_te_pred = []
for jet in range(4):
    y_te_pred.append(predict_labels(weigths[jet], tx_te[jet]))

for jet in range(4):
    ids_te[jet] = ids_te[jet].reshape((-1, 1))
y_pred = np.row_stack([y_te_pred[0], y_te_pred[1], y_te_pred[2], y_te_pred[3]])
ids = np.row_stack([ids_te[0], ids_te[1], ids_te[2], ids_te[3]])

print("I predicted ", str((y_pred == -1).sum()), "-1s and ",
      str((y_pred == 1).sum()), "1s")

# 9. Store the predictions
sub_file_name = "predictions"
create_csv_submission(ids, y_pred, sub_file_name)
print("Prediction stored in file '" + sub_file_name + "'")
            # Use ridge_regression to compute our model
            weights, pred_score = k_fold_cross_validation(y_train, processed_tx_train, k, imp.ridge_regression, [lambda_])

            print("Got predictions score = " + str(pred_score) + "\n")

            if pred_score > best_pred_score:
                # Update best results
                best_weights = np.copy(weights)
                best_pred_score = pred_score

                # Update best parameters
                best_degree = degree
                best_lambda = lambda_
                best_k = k

print("Best score on training data is " + str(best_pred_score))
print("Best parameters are (degree, lambda, k) = (" + str(best_degree) + ", " + str(best_lambda) + ", " + str(best_k) + ")")

# Create the predictions
processed_tx_test = preprocess.build_poly(tx_test, best_degree)
y_pred = helper.predict_labels(best_weights, processed_tx_test)

# Save the predictions
program_path = os.path.dirname(os.path.realpath(__file__))
filename = program_path + '/results/run_ridge.csv'
helper.create_csv_submission(ids, y_pred, filename)

# Best score on training data is 0.817712
# Best parameters are (degree, lambda, k) = (12, 0.0001, 5)
示例#10
0
X_train_pri_0 = np.load('X_pri_0.npy')
X_train_pri_1 = np.load('X_pri_1.npy')
X_train_pri_23 = np.load('X_pri_23.npy')

y_train_pri_0 = np.load('y_pri_0.npy')
y_train_pri_1 = np.load('y_pri_1.npy')
y_train_pri_23 = np.load('y_pri_23.npy')

if USE_PRETRAINED_WEIGHTS == True:
    w0 = np.load('w0.npy')
    w1 = np.load('w1.npy')
    w23 = np.load('w23.npy')
else:  # Model trained here
    w0, loss0 = ridge_regression(y_train_pri_0, build_poly(X_train_pri_0, 12),
                                 1e-14)
    w1, loss1 = ridge_regression(y_train_pri_1, build_poly(X_train_pri_1, 12),
                                 1e-3)
    w23, loss23 = ridge_regression(y_train_pri_23,
                                   build_poly(X_train_pri_23, 11), 1e-5)

pri_0_y = predict_labels(w0, build_poly(X_pri_0, 12))
pri_1_y = predict_labels(w1, build_poly(X_pri_1, 12))
pri_23_y = predict_labels(w23, build_poly(X_pri_23, 11))

predictions[ids_pri_0] = pri_0_y
predictions[ids_pri_1] = pri_1_y
predictions[ids_pri_23] = pri_23_y

create_csv_submission(ids, predictions, 'output.csv')
示例#11
0
"""
Load the datasets, train a model, and create a Kaggle submission for the first 
Machine Learning project

Authors: Kirill IVANOV, Matthias RAMIREZ, Nicolas TALABOT
"""

### Import modules and datasets
from proj1_helpers import load_csv_data, predict_labels, create_csv_submission
from implementations import least_squares
from utilities import split_data, preprocess_data

y_train, x_train, ids_train = load_csv_data("train.csv")
y_test, x_test, ids_test = load_csv_data("test.csv")

# Parameters
seed = 3
degree = 11
ratio = 0.66

# Learn the model
tx, x_mean, x_std = preprocess_data(x_train, degree)
x_tr, y_tr, x_te, y_te = split_data(tx, y_train, ratio, seed)
w, loss_tr = least_squares(y_tr, x_tr)

# Create a Kaggle submission
x_kaggle,_,_ = preprocess_data(x_test, degree, compute_mean_std=False, \
                               x_mean=x_mean, x_std=x_std)
y_pred = predict_labels(w, x_kaggle)
create_csv_submission(ids_test, y_pred, "run_submission.csv")
示例#12
0
    tX_improved = f_e.feature_engineer(tX_preprocessed)

    # In case we want to test our model locally by splitting our data
    if params.LOCAL_PREDICTION:
        pred.locally_predict(tX_improved, y_preprocessed, counts)
    else:
        print('Test set:')
        y_test, tX_test, ids_test = helpers.load_csv_data(
            params.DATA_TEST_PATH)
        y_test_preprocessed, tX_test_preprocessed, ids_test_preprocessed, masks_test, counts_test = prep.preprocess(
            y_test, tX_test, ids_test)
        tX_test_improved = f_e.feature_engineer(tX_test_preprocessed)
        log_initial_ws = []
        for i in range(len(tX_test_improved)):
            log_initial_ws.append(np.repeat(0, tX_test_improved[i].shape[1]))
        optimal_ws = pred.find_optimal_ws_grouped(
            tX_improved, y_preprocessed, params.IMPLEMENTATION, log_initial_ws,
            params.MAX_ITERS, params.GAMMA, params.DECREASING_GAMMA,
            params.LOG_LAMBDA, params.RIDGE_LAMBDA)
        y_preds = []
        for i in range(len(optimal_ws)):
            y_preds.append(
                helpers.predict_labels(optimal_ws[i], tX_test_improved[i],
                                       params.IMPLEMENTATION)[1])
        flat_y_preds = helpers.flatten_list(y_preds)
        flat_ids = helpers.flatten_list(ids_test_preprocessed)
        ids_indices = np.argsort(flat_ids)
        y_preds_sorted = np.array(flat_y_preds)[ids_indices]
        helpers.create_csv_submission(ids_test, y_preds_sorted,
                                      params.OUTPUT_PATH)
示例#13
0
from data_processing import process_data, build_poly

print("Loading data\n")

# Loading data from csv files
y_tr, tx_tr, ids_tr = load_csv_data("data/train.csv")
y_te, tx_te, ids_te = load_csv_data("data/test.csv")

# Hyper-parameters definitions
degree = 7
lambda_ = 0.00025

# Preprocessing data: cleaning, standardazing and adding constant column
tx_tr, tx_te = process_data(tx_tr, tx_te, y_tr, y_te)

# Feature augmentation through polynomials
tx_tr = build_poly(tx_tr, degree)
tx_te = build_poly(tx_te, degree)

# Training with ridge regression
print("Training the model\n")
weights, _ = ridge_regression(y_tr, tx_tr, lambda_)

# Computing prediction vector
y_pred = predict_labels(weights, tx_te)

# Creating file for submission
create_csv_submission(ids_te, y_pred, "prediction.csv")

print("Done")
示例#14
0
文件: run.py 项目: reslbesl/CS433_ML
    # Initialise training
    w_initial = np.ones(tx_train.shape[1])

    # Run gradient descent
    w, loss = logistic_regression_mean(y_train,
                                       tx_train,
                                       w_initial,
                                       MAX_ITERS,
                                       GAMMA,
                                       verbose=True)
    print(f'Training loss: {loss}')

    acc = eval_model(y_train, tx_train, w, thresh=0.5)
    print(f'Training accuracy: {acc}')

    # Load test data
    y_test, x_test, ids_test = load_csv_data(path.join(DATA_PATH, 'test.csv'))
    fx_test = feature_transform(x_test)

    # Standardise to mean and s.d. of training data
    fx_test = standardise_to_fixed(fx_test, mu_train, sigma_train)

    # Add offset term
    tx_test = np.c_[np.ones(fx_test.shape[0]), fx_test]

    # Get predictions on test set
    y_pred = predict_labels(w, tx_test, thresh=0.5)
    create_csv_submission(ids_test, y_pred,
                          path.join(DATA_PATH, 'final_submission.csv'))
示例#15
0
def create_submission(name, tx_test):
    """Creates the submission file using the given test data and filename."""
    predictions = run_on_test_data(tx_test)
    predictions[predictions == 0] = -1
    create_csv_submission(list(range(350000, 350000 + len(predictions))),
                          predictions, name)
示例#16
0
    ws = []

    for k in range(k_fold):

        loss_tr, loss_te, w = cross_validation(y, x, k_indices, k, lambda_,
                                               degree)
        losses_te.append(loss_te)
        losses_tr.append(loss_tr)
        ws.append(w)

    return np.mean(losses_te, axis=0), np.mean(losses_tr,
                                               axis=0), np.mean(ws, axis=0)


losses_te, losses_tr, w = cross_validation_ridge()

print(
    f'Average Missclassification proportion on test folds was {losses_te}. On Train folds it was {losses_tr}.'
)

test_y, test_x, test_ids = load_csv_data(DATA_PATH + 'test.csv')

# replace missing values with means determined from training data
test_x, _, _, _ = normalize(test_x, col_mean, xmin, xmax)

# create final predictions on testing data and submission csv
y_pred = predict_labels(w, build_poly(test_x, degree))
create_csv_submission(test_ids, y_pred, DATA_PATH + 'inferred.csv')
print(
    'Your final submission has been created and is called /data/inferred.csv')
示例#17
0
for i_22 in range(4):
    full_std, _, _ = standardize(only_good_data_full[i_22])
    test_std, _, _ = standardize(only_good_data_test[i_22])

    phi_full = build_poly(full_std, bests[i_22][0])
    phi_test = build_poly(test_std, bests[i_22][0])

    w, _ = ridge_regression(yb_full_by_22[i_22], phi_full, bests[i_22][1])

    # Get predictions by nearest value
    yb_test_by_22[i_22] = predict(w, phi_test)

    preds[i_22] = prediction(w, phi_full, yb_full_by_22[i_22])
    print('Ratio of good predictions for jet', i_22, ':', preds[i_22])

# Weighted average for predictions
overall = (preds[0] * yb_full_by_22[0].shape[0] + preds[1] *
           yb_full_by_22[1].shape[0] + preds[2] * yb_full_by_22[2].shape[0] +
           preds[3] * yb_full_by_22[3].shape[0]) / (
               yb_full_by_22[0].shape[0] + yb_full_by_22[1].shape[0] +
               yb_full_by_22[2].shape[0] + yb_full_by_22[3].shape[0])

print('Overall prediction', overall)

print('Creating submission')
yb_submit = np.concatenate(yb_test_by_22)
ids_submit = np.concatenate(ids_test_by_22)

create_csv_submission(ids_submit, yb_submit, 'submission_by_cat.csv')
print('Done')
示例#18
0
## Prediction
############################
print('prediction started')

# load the test set
print('loading the testing dataset...')
y_test, tx_test, ids_test = load_csv_data(dat_dir + "test.csv")
print('data loaded...')

# combine all the selected features for testing set
# Note we used the same means and stds from training set
test_log, _, _ = compute_log(tx_test, index_log, mean_log, std_log)
test_theta, _, _ = compute_theta(tx_test, index_theta, mean_theta, std_theta)
test_physics, _, _ = compute_physics(tx_test, index_physics_A, index_physics_B,
                                     index_physics_C, mean_physics,
                                     std_physics)
test_new = np.c_[test_log, test_theta, test_physics]

# reconstruct all the features of test set using the best degrees from training set
test_best_degree = build_poly_by_feature(test_new, best_degrees)
# Normalization
X_test = sigmoid(test_best_degree)

# predict
y_pred = predict_regression_labels(best_weights, X_test, threshold=0)

print('prediction ended')

# generate submission
create_csv_submission(ids_test, y_pred, 'submission.csv')
print('submission generated')
示例#19
0
            else:
                dict_lambda_weight[lambda_] = [loss_te, weight]
            rmse_tr_tmp.append(loss_tr)
        rmse_tr.append(np.mean(rmse_tr_tmp))
        rmse_te.append(np.mean(rmse_te_tmp))
        print("lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".
              format(l=lambda_, tr=rmse_tr[ind], te=rmse_te[ind]))
    ind_lambda_opt = np.argmin(rmse_te)
    best_lambda = lambdas[ind_lambda_opt]
    best_rmse = rmse_te[ind_lambda_opt]
    best_weight = dict_lambda_weight[best_lambda][1]
    return best_weight, best_rmse, best_lambda


print("training")
optimal_weight, best_rmse, best_lambda = cross_validation_demo()

x_train2 = build_poly(x_train, degree)
y_pred = predict_labels(optimal_weight, x_train2)

output = accuracy(y_pred, y_train)

print("done, training accuracy:")
print(output)

x_test2 = build_poly(x_test, degree)
y_pred2 = predict_labels(optimal_weight, x_test2)

print("creating submission")
create_csv_submission(ids_test, y_pred2, 'ridge_regression_final.csv')
示例#20
0
run.py is used to launch the application of weights on a test dataset and serialize the results.
"""


def load_npy(*npy_paths):
    """
    Returns numpy arrays serialized at npy_paths.
    Args:
        npy_paths : a sequence of serialized np.arrays files paths.
    Returns:
        Deserialized numpy arrays
    """
    return (np.load(p) for p in npy_paths)


# Load the test dataset
_, test_data, test_ids, _ = load_csv_data('all/test.csv')

# Load the weights, feature masks and parameters (mean, std_dev)
weights, clean_features, parameters = load_npy('all/weights.npy',
                                               'all/clean_features.npy',
                                               'all/parameters.npy')

# Runs the weights against the test dataset
pri_jet_num_idx = 22
polynomial_degree = 3
predictions = model_predictions(test_data, weights, pri_jet_num_idx,
                                clean_features, parameters, polynomial_degree)

create_csv_submission(test_ids, predictions, 'all/predictions.csv')
示例#21
0
g, l, avg_test_accuracy_RLR = cross_validation_RLR(X_train, y_train, k_fold=4, seed=1)

#%% Testing functions
#np.random.seed(42)

gamma = 0.2
lambda_ = 4E-5

w, loss = least_squares(y = y_train, tx = X_train)
#
w, loss = least_squares_SGD(y = y_train, tx = X_train, initial_w = np.random.random(size=num_features)*0.01, max_iters = 200000, gamma = gamma)
#
w, loss = ridge_regression(y = y_train, tx = X_train, lambda_ = lambda_)
#
w, loss = logistic_regression(y = y_train, tx = X_train, initial_w = np.random.random(size=num_features)*10, max_iters = 125000, gamma = gamma)
#
w, loss = reg_logistic_regression(y = y_train, tx = X_train, lambda_ = lambda_, initial_w = np.random.random(size=num_features)*0.01, max_iters = 200000, gamma = gamma)

plt.plot(w)

#%% Predictive step
y_test = X_test @ w

plt.hist(y_test, bins=200)

y_pred = predict_labels(w, X_test)

#%% Create submission

create_csv_submission(test.Id, y_pred, 'submission.csv')
示例#22
0
文件: run.py 项目: Battleman/Space-ML
    print("Computing optimal weights")
    w, _ = ridge_regression(y_correspond, x_train_aug, LAMBDAS[i])
    del x_train_aug

    # features engineering test set
    print("Augmenting testing set")
    x_test_aug_fname = "cache/x_test_augmented_jet{}_{}dim.np".format(
        i, COMBINED_DEGREES[i])

    try:
        with open(x_test_aug_fname, "rb") as f:
            x_test_aug = np.load(f)
    except FileNotFoundError:
        # not existing, recomputing
        x_test_aug = augment(XS_TEST[i], COMBINED_DEGREES[i],
                             SIMPLE_DEGREES[i], TAN_HYP_DEGREES[i],
                             INVERSE_LOG_DEGREES[i], ROOT_DEGREES[i])
        if CACHE:
            with open(x_test_aug_fname, "wb") as f:
                np.save(f, x_test_aug)

    # compute predictions and store
    print("Predicting labels for subset")
    y_submission[MASKS_TEST[i]] = predict_labels(w, x_test_aug)
    del x_test_aug
    del w

# all predictions completed, create CSV
print("Creating submission")
create_csv_submission(ids_test, y_submission, OUTPUT_PATH)
示例#23
0
N = y_train.size
# TRAIN test accuracy for sanity
n_err = len(
    np.where(
        y_train != predict_01_labels(w, tX_train, 0.5).reshape(y_train.shape))
    [0])

print("train accuracy :", 1 - n_err / N)

##########################################################################
#### Generate a prediction on the test set
##########################################################################

# load data from test set
_, _tX_test, ids_test = load_csv_data("test.csv")

# Do the corresponding steps for tX_test (we do not need to remove outliers or oversample, even if we did it on the training set)
tX_test = replace_data(_tX_test)
one_hot_columns = one_hot_encode(tX_test, 22)
tX_test = normalize_data(tX_test)
tX_test = polynomial_expansion(np.delete(tX_test, 22, axis=1), polynomial_deg)
tX_test = np.c_[np.delete(tX_test, 22, axis=1), one_hot_columns]

#predict the labels with 0 threshold
y_pred = predict_labels(w, tX_test, 0.0)

name = "my_submission.csv"

#save predictions
create_csv_submission(ids_test, y_pred, name)
示例#24
0
    3.57813748e-01, 3.09738153e-03, -3.00165489e-02, -1.96892274e-02,
    -6.63026620e-03, -6.16628770e-03, 3.24687388e-02, -3.66001378e-03,
    1.59398191e-02, 4.65051845e-03, 1.01299540e-02, -3.51035034e-02,
    1.52764982e-02, 5.76623633e-03, 6.11417966e-03, 3.15704313e-02,
    -6.83057630e-03, -4.28346753e-03, -1.17045931e-02, -1.18246783e-01,
    -1.67412873e-03, 4.92261691e-03, -4.41274760e-03, -3.21693847e-02,
    -2.74392089e-02, 3.35931046e-02, -1.09660753e-01, 2.52705139e-01,
    2.77765221e-03, 1.91389879e-03, 3.25497546e-02, 2.85366822e-02,
    -1.02954086e-05, 8.91734653e-04, -3.61480432e-03, -1.42486539e-02,
    3.27415717e-02, 2.83215314e-02, -4.36361344e-03, 2.04638731e-03,
    6.34341119e-02, 4.53017769e-04, 4.98469992e-02, -6.59353018e-02,
    -5.19988600e-02, -3.28215812e-02, 1.50462194e-04, -5.62069645e-04,
    -7.85663783e-04, -2.79324395e-02, -5.99052349e-04, 4.93552796e-04,
    1.69177167e-02, 3.83051056e-04, 7.79895566e-02
])

# Training accuracy
(correct_count, total_count) = helpers.prediction_accuracy(y, X, w_star)
correct_ratio = correct_count / total_count

################################################################################
#                            analysis of results                               #
################################################################################
print("classification precision: {cp}".format(cp=correct_ratio))

################################################################################
#                            store data for submission                         #
################################################################################
y_pred = proj1_helpers.predict_labels(w_star, XTest)
proj1_helpers.create_csv_submission(idTest, y_pred, DATA_PREDICTIONS_PATH)