def main(): X_en, y_en = datasets.load_tsv( 'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv' ) X_fr, y_fr = datasets.load_tsv( 'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_fr.tsv' ) X_en = np.array(X_en) X_fr = np.array(X_fr) y_en = np.array([y_en]).T y_fr = np.array([y_fr]).T normalize1 = False if normalize1: X_en, X_max = normalize(X_en) y_en, y_max = normalize(y_en) X_fr, X_max = normalize(X_fr) y_fr, y_max = normalize(y_fr) w_init = np.array([10.0, 10.0, 10.0]) features = [] for i in range(30): if i < 15: features.append([0, X_en[i][0], X_en[i][1], y_en[i][0]]) else: features.append( [1, X_fr[i - 15][0], X_fr[i - 15][1], y_fr[i - 15][0]]) features = np.array(features) w = test_classification(features, normalize1)
wrong_predictions = 0 for input, label in zip([item[1:] for item in training_input], [item[0] for item in training_input]): prediction = self.predict(input) if prediction != label: wrong_predictions += 1 if label == 0 and prediction == 1: self.weights[1:] = vector.sub(self.weights[1:], vector.mul(self.learning_rate, input)) self.weights[0] -= self.learning_rate if label == 1 and prediction == 0: self.weights[1:] = vector.add(vector.mul(self.learning_rate, input), self.weights[1:]) self.weights[0] += self.learning_rate def print(self): print("Weights for logistic: " + str(self.weights)) X_en, y_en = datasets.load_tsv('https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv') X_fr, y_fr = datasets.load_tsv('https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_fr.tsv') X_en.extend(X_fr) y_en.extend(y_fr) X_en, maxima_X_en = normalize(X_en) X_en = list(x[1] for x in X_en) maxima_y_en = max(y_en) y_en = [yi / maxima_y_en for yi in y_en] maxima = [maxima_X_en[1]] + [maxima_y_en] # Create libsvm format file f = open('libsvm_format.txt', "w+") f.close() libsvmreader(X_en[:15], y_en[:15], 1, 'libsvm_format.txt') libsvmreader(X_en[15:], y_en[15:], 0, 'libsvm_format.txt')
def main(): X_en, y_en = datasets.load_tsv( 'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv' ) X_fr, y_fr = datasets.load_tsv( 'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_fr.tsv' ) X_en = np.array(X_en) X_fr = np.array(X_fr) y_en = np.array([y_en]).T y_fr = np.array([y_fr]).T X_org_en = X_en X_org_fr = X_fr y_org_en = y_en y_org_fr = y_fr alpha = 1.0e-11 normalize1 = True maxima_en = 0 maxima_fr = 0 w = np.zeros(X_en.shape[1]).reshape((-1, 1)) print(w) #English Salammbo #Normalize the vectors X and y if normalize1: X_en, X_max = normalize(X_en) y_en, y_max = normalize(y_en) maxima_en = np.concatenate((X_max, y_max)) maxima_en = maxima_en.reshape(-1, 1) alpha = 1 #Stochastic English w_en_s = stochastic(X_en, y_en, alpha, w) print("ENGLISH SALAMMBO", '\n') print("Stochastic weights: ", w_en_s) if normalize1: w_en_s = maxima_en[-1, 0] * (w_en_s / maxima_en[:-1, 0:1]) print("Restored stochastic weights", w_en_s) #Batch English w_en_b = batch(X_en, y_en, alpha, w) print("Batch weights: ", w_en_b) if normalize1: w_en_b = maxima_en[-1, 0] * (w_en_b / maxima_en[:-1, 0:1]) print("Restored batch weights", w_en_b) print('\n') #French Salammbo if normalize1: X_fr, X_max = normalize(X_fr) y_fr, y_max = normalize(y_fr) maxima_fr = np.concatenate((X_max, y_max)) alpha = 1 maxima_fr = maxima_fr.reshape(-1, 1) w_fr_s = stochastic(X_fr, y_fr, alpha, w) print("FRENCH SALAMMBO", '\n') print("Stochastic weights: ", w_fr_s) if normalize1: w_fr_s = maxima_fr[-1, 0] * (w_fr_s / maxima_fr[:-1, 0:1]) print("Restored stochastic weights", w_fr_s) w_fr_b = batch(X_fr, y_fr, alpha, w) print("Batch weights: ", w_fr_b) if normalize1: w_fr_b = maxima_fr[-1, 0] * (w_fr_b / maxima_fr[:-1, 0:1]) print("Restored batch weights", w_fr_b) #PLOT________________________ x = np.linspace(0, 80000, 100) y1 = w_en_b[1] * x + w_en_b[0] y2 = w_fr_b[1] * x + w_fr_b[0] y3 = w_en_s[1] * x + w_en_s[0] y4 = w_fr_s[1] * x + w_fr_s[0] plt.plot(X_org_en[:, 1], y_org_en, 'bs') plt.plot(X_org_fr[:, 1], y_org_fr, 'ro') line_1, = plt.plot(x, y1, label="English Batch") line_2, = plt.plot(x, y2, label="French Batch") line_3, = plt.plot(x, y3, label="English Stochastic") line_4, = plt.plot(x, y4, label="French Stochastic") plt.legend(handles=[line_1, line_2, line_3, line_4]) plt.show()
for epoch in range(1, 1000): loss = vector.sub(y, vector.mul_mat_vec(X, w)) gradient = vector.mul_mat_vec(vector.transpose(X), loss) w_old = w w = vector.add(w, vector.mul(alpha, gradient)) logs += (w, alpha, sse(X, y, w)) if vector.norm(vector.sub(w, w_old)) / vector.norm(w) < 1.0e-5: break print("Epoch", epoch) return w if __name__ == '__main__': normalized = True debug = False X, y = datasets.load_tsv( 'https://raw.githubusercontent.com/pnugues/ilppp/master/programs/ch04/salammbo/salammbo_a_en.tsv') alpha = 1.0e-10 if normalized: X, maxima_X = normalize(X) maxima_y = max(y) y = [yi / maxima_y for yi in y] maxima = maxima_X + [maxima_y] alpha = 1.0 print("-Normalized-") print("===Batch descent===") w = [0.0] * (len(X)) w = batch_descent(X, y, alpha, w) print("Weights", w) print("SSE", sse(X, y, w))