def fit(self, X, y, n_iterations=4000): # Add dummy ones for bias weights X = np.insert(X, 0, 1, axis=1) n_samples, n_features = np.shape(X) # Initial parameters between [-1/sqrt(N), 1/sqrt(N)] a = -1 / math.sqrt(n_features) b = -a self.param = (b - a) * np.random.random((n_features, )) + a # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = sigmoid(X.dot(self.param)) if self.gradient_descent: # Move against the gradient of the loss function with # respect to the parameters to minimize the loss self.param -= self.learning_rate * X.T.dot(y_pred - y) else: # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal( sigmoid_gradient(X.dot(self.param))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot( X.T).dot( diag_gradient.dot(X).dot(self.param) + y - y_pred)
def fit(self, X, y, n_iterations=4): X_train = np.array(X, dtype=float) # Add one to take bias weights into consideration X_train = np.insert(X_train, 0, 1, axis=1) y_train = np.atleast_1d(y) n_features = len(X_train[0]) # Initial parameters between [-1/sqrt(N), 1/sqrt(N)] a = -1 / math.sqrt(n_features) b = -a self.param = (b - a) * np.random.random((len(X_train[0]), )) + a # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction dot = X_train.dot(self.param) y_pred = sigmoid(dot) # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal(sigmoid_gradient(dot)) # Batch opt: # (X^T * diag(sigm*(1 - sigm) * X) * X^T * (diag(sigm*(1 - sigm) * X * param + Y - Y_pred) self.param = np.linalg.pinv( X_train.T.dot(diag_gradient).dot(X_train)).dot(X_train.T).dot( diag_gradient.dot(X_train).dot(self.param) + y_train - y_pred)
def fit(self, X, y, n_iterations=4000): # Add dummy ones for bias weights X = np.insert(X, 0, 1, axis=1) n_samples, n_features = np.shape(X) # Initial parameters between [-1/sqrt(N), 1/sqrt(N)] a = -1 / math.sqrt(n_features) b = -a self.param = (b - a) * np.random.random((n_features,)) + a # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = sigmoid(X.dot(self.param)) if self.gradient_descent: # Move against the gradient of the loss function with # respect to the parameters to minimize the loss self.param -= self.learning_rate * X.T.dot(y_pred - y) else: # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal(sigmoid_gradient(X.dot(self.param))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot(X.T).dot(diag_gradient.dot(X).dot(self.param) + y - y_pred)
def fit(self, X, y, n_iterations=1000): self._initialize_parameters(X) # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = self.sigmoid(X.dot(self.param)) if self.gradient_descent: # Move against the gradient of the loss function with # respect to the parameters to minimize the loss self.param -= self.learning_rate * -(y - y_pred).dot(X) else: # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal( self.sigmoid.gradient(X.dot(self.param))) # diag_gradient = np.zeros((len(X), len(X))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot( X.T).dot( diag_gradient.dot(X).dot(self.param) + y - y_pred)
def fit(self, X, y, n_iterations=4): # Add dummy ones for bias weights X = np.insert(X, 0, 1, axis=1) n_samples, n_features = np.shape(X) # Initial parameters between [-1/sqrt(N), 1/sqrt(N)] a = -1 / math.sqrt(n_features) b = -a self.param = (b - a) * np.random.random((n_features, )) + a # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = sigmoid(X.dot(self.param)) # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal(sigmoid_gradient(X.dot(self.param))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot( X.T).dot(diag_gradient.dot(X).dot(self.param) + y - y_pred)