def gradient_Descent(X, y, theta, alpha, epoch, l): cost_1 = [] for i in range(epoch): theta = theta - alpha * (1 / len(X) * (sigmoid(X @ theta.T) - y).T @ X + l / len(X) * theta) # print(cost_reg(X,y,theta,l)) cost_1.append(cost_reg(X, y, theta, l)) return theta, cost_1
def predict_tweet(tweet, freqs, theta): """ :param tweet: a string :param freqs: a dictionary corresponding to frequency of each tupple (word,label) :param theta: (3,1) weight vector :return: y_pred: the probability of tweet being positive or negative """ x = extract_features(tweet, freqs) y_pred = sigmoid(np.dot(x, theta)) return y_pred
# print("Entrenando") # for i in range(k): # progress(i + 1, k) # tmp_y = np.array(y == i, dtype = int) # optTheta = logisticRegression(X, tmp_y, np.zeros((n,1))).x # all_theta[i] = optTheta # # #Salvo los vectores theta como un archivo csv # # predictores = pd.DataFrame(all_theta) # predictores.to_csv('all_theta.csv',index = False) #Predicciones para cada numero p = np.zeros((m, 1)) for i in range(m): p[i] = np.argmax(sigmoid(X[i, :].dot(all_theta.T))) s = sum([1 if p[i] == y[i] else 0 for i in range(m)]) print("\n\n") print("Train Accuracy ", (s / m) * 100, " %") plt.ion() #modo interactivo plt.gray() fig = plt.figure() out = "y" all_theta = pd.read_csv('all_theta.csv').as_matrix() print(all_theta.shape)
data = pd.read_csv("ex2data1.csv", names = ['test1', 'test2', 'condicion']) #Preparando los datos X = np.ones((100,3)) y = np.ones((100,1)) X[:,1] = data['test1'].values X[:,2] = data['test2'].values y[:,0] = data['condicion'].values m = len(y) initial_theta = np.zeros((3, 1)) optimized_theta = logisticRegression(X, y, initial_theta).x #Predicciones para los datos de entrada p = sigmoid(X.dot(optimized_theta)) >= 0.5 #Numero de predicciones exitosas s = sum([1 if p[i] == y[i] else 0 for i in range(m)]) print("Train Accuracy ", (s / m) * 100 , " %") prob_test = sigmoid(np.array([1, 45, 85]).dot(optimized_theta)) print('For a student with scores 45 and 85, we predict an admission probability of ', prob_test) #Graficando los datos admitidos = [x for x in data.values if x[2] == 1.] rechazados = [x for x in data.values if x[2] == 0.] plt.scatter([x[0] for x in admitidos], [y[1] for y in admitidos], s = 60, c = 'blue', marker='+',label = 'admitido') plt.scatter([x[0] for x in rechazados], [y[1] for y in rechazados], s = 60, c = 'red', marker='o',label = 'rechazado') plt.xlim(30,100) plt.ylim(30,100) plt.xlabel('Exam 1 Score')
all_theta = np.zeros((k, n + 1)) #Entrenamiento #OneVsAll print("Entrenando") i = 0 for flor in Species: progress(i + 1, k) tmp_y = np.array(y_train == flor, dtype=int) optTheta = logisticRegression(X_train, tmp_y, np.zeros((n + 1, 1))).x all_theta[i] = optTheta i += 1 #Predicciones P = sigmoid(X_test.dot(all_theta.T)) p = [Species[np.argmax(P[i, :])] for i in range(X_test.shape[0])] s = sum(np.array(p == y_test, dtype=int)) print("\n\n") print("Test Accuracy ", (s / X_test.shape[0]) * 100, "%") #Matrix de confusion cfm = confusion_matrix(y_test, p, labels=Species) #plt.yticks(cfm[:,0], Species, rotation = 'horizontal') sb.heatmap(cfm, annot=True, xticklabels=Species, yticklabels=Species) plt.show()