def train_lr(): params = { "offline_model_dir": "../weights", } params.update(params_common) X_train, X_valid = load_data("train"), load_data("vali") X_test = load_data("test") # print(X_test['label']) model = LogisticRegression("ranking", params, logger) model.fit(X_train, validation_data=X_valid) model.save_session() model.predict(X_test, 'pred.txt')
def logistic_accuracy(model: LogisticRegression, X: np.ndarray, targets: np.ndarray): predictions = model.predict(X) # These are probabilities predictions = np.around(predictions) predictions = predictions.reshape(-1) targets = targets.reshape(-1) correct = sum(predictions == targets) return correct / len(targets)
def logistic_loss_function(model: LogisticRegression, X: np.ndarray, targets: np.ndarray): """ Args: model: Logistic regression model X: array of pca transformed images targets: actual labels for the images the predictions are done on Returns: The average cross entropy loss over all the predictions """ predictions = model.predict(X) # Error when label equal 1 loss_1 = targets * np.log(predictions) # Error when label equal 0 loss_0 = (1 - targets) * np.log(1 - predictions) total_loss = loss_1 + loss_0 # return the average loss overall return -total_loss.sum() / targets.shape[0]
#################################### # Read and preprocess data from files #################################### df_train = pd.read_csv(fp_train, skipinitialspace=True) df_test = pd.read_csv(fp_test, skipinitialspace=True) df_train['income'].replace('<=50K', 0, inplace=True) df_train['income'].replace('>50K', 1, inplace=True) Xtrain, ytrain, Xtest = preprocess(df_train, df_test, features) #################################### # Train the estimator and predict test data #################################### regr = [] if estimator == 'logistic': regr = LogisticRegression().fit(Xtrain, ytrain) ypred = np.around(regr.predict(Xtest)).astype(int) elif estimator == 'generative': regr = NaiveBayes().fit(Xtrain, ytrain) ypred = regr.predict(Xtest) #################################### # Write the result to file #################################### df_pred = pd.DataFrame() df_pred['id'] = np.arange(1, len(ypred) + 1) df_pred['label'] = ypred df_pred.to_csv(fp_ans, index=False)
print('Done') temp_X_train = np.concatenate((worm_images, noworm_images)) y_train = np.concatenate((worm_label, noworm_label)) print('Shuffling images and labels ...') X_data, y_data = shuffling_files(temp_X_train, y_train) print('spliting data .....') X_train, X_test = data_split(X_data) y_train, y_test = data_split(y_data) print('Done') X_train = X_train / 255 X_test = X_test / 255 model = LogisticRegression(lr=0.02, epochs=500, lamb=8) tic1 = time.time() model.fit(X_train, y_train) toc1 = time.time() tic2 = time.time() y_pred = model.predict(X_test) toc2 = time.time() y_pred = np.argmax(y_pred, axis=1) print('Training Time: {}'.format(toc1 - tic1)) print('Testing Time: {}'.format(toc2 - tic2)) print('acc_test: {}'.format(accuracy_score(y_test, y_pred))) plt.show()