def test_020_matmul_build_specification(): name = "matmul01" num_nodes = 8 num_features = 2 weights_initialization_scheme = "he" expected_spec = { _SCHEME: Matmul.class_id(), _PARAMETERS: { _NAME: name, _NUM_NODES: num_nodes, _NUM_FEATURES: num_features, # NOT including bias _WEIGHTS: { _SCHEME: weights_initialization_scheme }, _OPTIMIZER: SGD.specification(name="sgd") } } actual_spec = Matmul.specification( name=name, num_nodes=num_nodes, num_features=num_features, weights_initialization_scheme=weights_initialization_scheme, ) assert expected_spec == actual_spec, \ "expected\n%s\nactual\n%s\n" % (expected_spec, actual_spec)
def _instantiate(name: str, num_nodes: int, num_features: int, objective=None): category = TYPE_FLOAT(np.random.uniform()) if category < 0.3: W = weights.he(num_nodes, num_features + 1) elif category < 0.7: W = weights.xavier(num_nodes, num_features + 1) else: W = weights.uniform(num_nodes, num_features + 1) matmul = Matmul(name=name, num_nodes=num_nodes, W=W) if objective is not None: matmul.objective = objective return matmul
def test_020_matmul_instantiation(): """ Objective: Verify the initialized layer instance provides its properties. Expected: * name, num_nodes, M, log_level are the same as initialized. * X, T, dX, objective returns what is set. * N, M property are provided after X is set. * Y, dY properties are provided after they are set. """ def objective(X: np.ndarray) -> Union[float, np.ndarray]: """Dummy objective function""" return np.sum(X) for _ in range(NUM_MAX_TEST_TIMES): N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE) M: int = np.random.randint(1, NUM_MAX_NODES) D: int = np.random.randint(1, NUM_MAX_FEATURES) name = "test_020_matmul_instantiation" matmul = Matmul(name=name, num_nodes=M, W=weights.he(M, D + 1), log_level=logging.DEBUG) matmul.objective = objective assert matmul.name == name assert matmul.num_nodes == matmul.M == M matmul._D = D assert matmul.D == D X = np.random.randn(N, D).astype(TYPE_FLOAT) matmul.X = X assert np.array_equal(matmul.X, X) assert matmul.N == N == X.shape[0] matmul._dX = X assert np.array_equal(matmul.dX, X) T = np.random.randint(0, M, N).astype(TYPE_LABEL) matmul.T = T assert np.array_equal(matmul.T, T) matmul._Y = np.dot(X, X.T) assert np.array_equal(matmul.Y, np.dot(X, X.T)) matmul._dY = np.array(0.9) assert matmul._dY == np.array(0.9) matmul.logger.debug("This is a pytest") assert matmul.objective == objective
def test_020_matmul_instantiation_to_fail(): """ Objective: Verify the layer class validates the initialization parameter constraints. Expected: Initialization detects parameter constraints not meet and fails. """ name = "test_020_matmul_instantiation_to_fail" for _ in range(NUM_MAX_TEST_TIMES): M: int = np.random.randint(1, NUM_MAX_NODES) D = 1 # Constraint: Name is string with length > 0. try: Matmul(name="", num_nodes=1, W=weights.xavier(M, D + 1)) raise RuntimeError( "Matmul initialization with invalid name must fail") except AssertionError: pass # Constraint: num_nodes > 1 try: Matmul(name="test_020_matmul", num_nodes=0, W=weights.xavier(M, D + 1)) raise RuntimeError("Matmul(num_nodes<1) must fail.") except AssertionError: pass # Constraint: logging level is correct. try: Matmul(name="test_020_matmul", num_nodes=M, W=weights.xavier(M, D + 1), log_level=-1) raise RuntimeError( "Matmul initialization with invalid log level must fail") except (AssertionError, KeyError) as e: pass # Matmul instance creation fails as W.shape[1] != num_nodes try: Matmul(name="", num_nodes=1, W=weights.xavier(2, D + 1)) raise RuntimeError( "Matmul initialization with invalid name must fail") except AssertionError: pass
def test_020_matmul_builder_to_fail_weight_spec(): """ Objective: Verify the Matmul.build() Expected: build() parse the spec and fail with invalid weight configurations """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): M = np.random.randint(1, 100) D = np.random.randint(1, 100) # NOT including bias # ---------------------------------------------------------------------- # Validate the correct specification. # NOTE: Invalidate one parameter at a time from the correct one. # Otherwise not sure what you are testing. # ---------------------------------------------------------------------- valid_matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he" } } try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_WEIGHTS][_SCHEME] = "invalid_scheme" try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid weight scheme") except AssertionError: pass profiler.disable() profiler.print_stats(sort="cumtime")
def output(m, d): return { "matmul": Matmul.specification( name="matmul", num_nodes=m, num_features=d, weights_initialization_scheme="he", weights_optimizer_specification=optimizer.SGD.specification( lr=0.05, l2=1e-3)), "loss": CrossEntropyLogLoss.specification(name="loss", num_nodes=m) }
def test(): M = 1 D = 2 N = 100 X, T, V = linear_separable(d=D, n=N) x_min, x_max = X[:, 0].min(), X[:, 0].max() y_min, y_max = X[:, 1].min(), X[:, 1].max() sigmoid_classifier_specification = { _NAME: "softmax_classifier", _NUM_NODES: M, _LOG_LEVEL: logging.ERROR, _COMPOSITE_LAYER_SPEC: { "matmul01": Matmul.specification( name="matmul", num_nodes=M, num_features=D, weights_initialization_scheme="he", weights_optimizer_specification=SGD.specification( lr=TYPE_FLOAT(0.2), l2=TYPE_FLOAT(1e-3))), "loss": CrossEntropyLogLoss.specification( name="loss", num_nodes=M, loss_function=sigmoid_cross_entropy_log_loss.__qualname__) } } logistic_classifier = SequentialNetwork.build( specification=sigmoid_classifier_specification, ) for i in range(50): logistic_classifier.train(X=X, T=T) prediction = logistic_classifier.predict( np.array([-1., -1.], dtype=TYPE_FLOAT)) np.isin(prediction, [0, 1]) print(prediction)
def inference(index: int, m: int, d: int) -> Dict[str, dict]: """Build matmul-bn-activation specifications Args: index: stack position in the network m: number of outputs (== number of nodes) d: number of features in the input """ return { f"matmul{index:03d}": Matmul.specification( name=f"matmul{index:03d}", num_nodes=m, num_features=d, weights_initialization_scheme="he", weights_optimizer_specification=optimizer.SGD.specification( lr=0.05, l2=1e-3)), f"bn{index:03d}": BatchNormalization.specification( name=f"bn{index:03d}", num_nodes=m, gamma_optimizer_specification=optimizer.SGD.specification( lr=0.05, l2=1e-3), beta_optimizer_specification=optimizer.SGD.specification( lr=0.05, l2=1e-3, ), momentum=0.9), f"activation{index:03d}": ReLU.specification( name=f"relu{index:03d}", num_nodes=m, ) if activation == ReLU.class_id() else Sigmoid.specification( name=f"sigmoid{index:03d}", num_nodes=m, ) }
def train_matmul_bn_relu_classifier(N: int, D: int, M: int, X: np.ndarray, T: np.ndarray, W: np.ndarray, log_loss_function: Callable, optimizer: Optimizer, num_epochs: int = 100, test_numerical_gradient: bool = False, log_level: int = logging.ERROR, callback: Callable = None): """Test case for binary classification with matmul + log loss. Args: N: Batch size D: Number of features M: Number of nodes. 1 for sigmoid and 2 for softmax X: train data T: labels W: weight log_loss_function: cross entropy logg loss function optimizer: Optimizer num_epochs: Number of epochs to run test_numerical_gradient: Flag if test the analytical gradient with the numerical one. log_level: logging level callback: callback function to invoke at the each epoch end. """ name = __name__ assert isinstance(T, np.ndarray) and np.issubdtype( T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N assert isinstance( X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[ 0] == N and X.shape[1] == D assert isinstance( W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[ 0] == M and W.shape[1] == D + 1 assert num_epochs > 0 and N > 0 and D > 0 assert (log_loss_function == softmax_cross_entropy_log_loss and M >= 2) # -------------------------------------------------------------------------------- # Instantiate a CrossEntropyLogLoss layer # -------------------------------------------------------------------------------- loss: CrossEntropyLogLoss = CrossEntropyLogLoss( name="loss", num_nodes=M, log_loss_function=log_loss_function, log_level=log_level) # -------------------------------------------------------------------------------- # Instantiate a ReLU layer # -------------------------------------------------------------------------------- activation: ReLU = ReLU(name="relu", num_nodes=M, log_level=log_level) activation.objective = loss.function # -------------------------------------------------------------------------------- # Instantiate a Matmul layer # -------------------------------------------------------------------------------- bn: BatchNormalization = BatchNormalization(name=name, num_nodes=M, log_level=logging.WARNING) bn.objective = compose(activation.function, activation.objective) # -------------------------------------------------------------------------------- # Instantiate a Matmul layer # -------------------------------------------------------------------------------- matmul: Matmul = Matmul(name="matmul", num_nodes=M, W=W, optimizer=optimizer, log_level=log_level) matmul.objective = compose(bn.function, bn.objective) # -------------------------------------------------------------------------------- # Instantiate a Normalization layer # Need to apply the same mean and std to the non-training data set. # -------------------------------------------------------------------------------- # norm = Standardization( # name="standardization", # num_nodes=M, # log_level=log_level # ) # X = np.copy(X) # X = norm.function(X) # Network objective function f: L=f(X) objective = compose(matmul.function, matmul.objective) prediction = compose(matmul.predict, bn.predict, activation.predict) num_no_progress: int = 0 # how many time when loss L not decreased. loss.T = T # pylint: disable=not-callable history: List[np.ndarray] = [matmul.objective(matmul.function(X))] for i in range(num_epochs): # -------------------------------------------------------------------------------- # Layer forward path # 1. Calculate the matmul output Y=matmul.f(X) # 2. Calculate the ReLU output A=activation.f(Y) # 3. Calculate the loss L = loss(A) # Test the numerical gradient dL/dX=matmul.gradient_numerical(). # -------------------------------------------------------------------------------- Y = matmul.function(X) BN = bn.function(Y) A = activation.function(BN) L = loss.function(A) # ******************************************************************************** # Constraint: Network objective L must match layer-by-layer output # ******************************************************************************** # pylint: disable=not-callable assert L == objective(X) and L.shape == (), \ f"Network objective L(X) %s must match layer-by-layer output %s." \ % (objective(X), L) if not (i % 10): print(f"iteration {i} Loss {L}") Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L) # ******************************************************************************** # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn) # ******************************************************************************** if L >= history[-1] and i > 0: Logger.warning( "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.", i, L, history[-1], num_no_progress + 1) # -------------------------------------------------------------------------------- # Reduce the learning rate can make the situation worse. # When reduced the lr every time L >= history, the (L >= history) became successive # and eventually exceeded 50 successive non-improvement ending in failure. # Keep the learning rate make the L>=history more frequent but still up to 3 # successive events, and the training still kept progressing. # -------------------------------------------------------------------------------- num_no_progress += 1 if num_no_progress > 5: matmul.lr = matmul.lr * TYPE_FLOAT(0.95) if num_no_progress > 50: Logger.error( "The training has no progress more than %s times.", num_no_progress) break else: num_no_progress = 0 history.append(L) # ================================================================================ # Layer backward path # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY. # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX. # ================================================================================ before = copy.deepcopy(matmul.W) dA = loss.gradient(TYPE_FLOAT(1)) # dL/dA dBN = activation.gradient(dA) # dL/dBN dY = bn.gradient(dBN) # dL/dY dX = matmul.gradient(dY) # dL/dX # gradient descent and get the analytical gradients bn.update() dS = matmul.update() # dL/dX, dL/dW # ******************************************************************************** # Constraint. W in the matmul has been updated by the gradient descent. # ******************************************************************************** Logger.debug("W after is \n%s", matmul.W) assert not np.array_equal(before, matmul.W), "W has not been updated." if test_numerical_gradient: # -------------------------------------------------------------------------------- # Numerical gradient # -------------------------------------------------------------------------------- gn = matmul.gradient_numerical() validate_against_numerical_gradient([dX] + dS, gn, Logger) # prepend dL/dX if callback: # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0)) callback(W=matmul.W) return matmul.W, objective, prediction
def disabled_test_020_matmul_round_trip(): """ TODO: Disabled as need to re-design numerical_jacobian for 32 bit float e.g TF. Objective: Verify the forward and backward paths at matmul. Expected: Forward path: 1. Matmul function(X) == X @ W.T 2. Numerical gradient should be the same with numerical Jacobian Backward path: 3. Analytical gradient dL/dX == dY @ W 4. Analytical dL/dW == X.T @ dY 5. Analytical gradients are similar to the numerical gradient ones Gradient descent 6. W is updated via the gradient descent. 7. Objective L is decreasing via the gradient descent. """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): # -------------------------------------------------------------------------------- # Instantiate a Matmul layer # -------------------------------------------------------------------------------- N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE) M: int = np.random.randint(1, NUM_MAX_NODES) D: int = np.random.randint(1, NUM_MAX_FEATURES) W = weights.he(M, D + 1) name = "test_020_matmul_methods" def objective(X: np.ndarray) -> Union[float, np.ndarray]: """Dummy objective function to calculate the loss L""" return np.sum(X) # Test both static instantiation and build() if TYPE_FLOAT(np.random.uniform()) < 0.5: matmul = Matmul(name=name, num_nodes=M, W=W, log_level=logging.DEBUG) else: matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he", }, _OPTIMIZER: { _SCHEME: "sGd" } } matmul = Matmul.build(matmul_spec) matmul.objective = objective # ================================================================================ # Layer forward path # Calculate the layer output Y=f(X), and get the loss L = objective(Y) # Test the numerical gradient dL/dX=matmul.gradient_numerical(). # # Note that bias columns are added inside the matmul layer instance, hence # matmul.X.shape is (N, 1+D), matmul.W.shape is (M, 1+D) # ================================================================================ X = np.random.randn(N, D).astype(TYPE_FLOAT) Logger.debug("%s: X is \n%s", name, X) # pylint: disable=not-callable Y = matmul.function(X) # pylint: disable=not-callable L = matmul.objective(Y) # Constraint 1 : Matmul outputs Y should be [email protected] assert np.array_equal(Y, np.matmul(matmul.X, matmul.W.T)) # Constraint 2: Numerical gradient should be the same with numerical Jacobian GN = matmul.gradient_numerical() # [dL/dX, dL/dW] # DO NOT use matmul.function() as the objective function for numerical_jacobian(). # The state of the layer will be modified. # LX = lambda x: matmul.objective(matmul.function(x)) def LX(x): y = np.matmul(x, matmul.W.T) # pylint: disable=not-callable return matmul.objective(y) EGNX = numerical_jacobian(LX, matmul.X) # Numerical dL/dX including bias EGNX = EGNX[::, 1::] # Remove bias for dL/dX assert np.array_equal(GN[0], EGNX), \ "GN[0]\n%s\nEGNX=\n%s\n" % (GN[0], EGNX) # DO NOT use matmul.function() as the objective function for numerical_jacobian(). # The state of the layer will be modified. # LW = lambda w: matmul.objective(np.matmul(X, w.T)) def LW(w): Y = np.matmul(matmul.X, w.T) # pylint: disable=not-callable return matmul.objective(Y) EGNW = numerical_jacobian(LW, matmul.W) # Numerical dL/dW including bias assert np.array_equal(GN[1], EGNW) # No need to remove bias # ================================================================================ # Layer backward path # Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dummy dL/dY. # ================================================================================ dY = np.ones_like(Y) dX = matmul.gradient(dY) # Constraint 3: Matmul gradient dL/dX should be dL/dY @ W. Use a dummy dL/dY = 1.0. expected_dX = np.matmul(dY, matmul.W) expected_dX = expected_dX[::, 1:: # Omit bias ] assert np.array_equal(dX, expected_dX) # Constraint 5: Analytical gradient dL/dX close to the numerical gradient GN. assert np.all(np.abs(dX - GN[0]) < GRADIENT_DIFF_ACCEPTANCE_VALUE), \ "dX need close to GN[0]. dX:\n%s\ndiff \n%s\n" % (dX, dX-GN[0]) # -------------------------------------------------------------------------------- # Gradient update. # Run the gradient descent to update Wn+1 = Wn - lr * dL/dX. # -------------------------------------------------------------------------------- # Python passes the reference to W, hence it is directly updated by the gradient- # descent to avoid a temporary copy. Backup W before to compare before/after. backup = copy.deepcopy(W) # Gradient descent and returns analytical dL/dX, dL/dW dS = matmul.update() dW = dS[0] # Constraint 6.: W has been updated by the gradient descent. assert np.any(backup != matmul.W), "W has not been updated " # Constraint 5: the numerical gradient (dL/dX, dL/dW) are closer to the analytical ones. assert validate_against_expected_gradient(GN[0], dX), \ "dX=\n%s\nGN[0]=\n%sdiff=\n%s\n" % (dX, GN[0], (dX-GN[0])) assert validate_against_expected_gradient(GN[1], dW), \ "dW=\n%s\nGN[1]=\n%sdiff=\n%s\n" % (dW, GN[1], (dW-GN[1])) # Constraint 7: gradient descent progressing with the new objective L(Yn+1) < L(Yn) # pylint: disable=not-callable assert np.all(np.abs(objective(matmul.function(X)) < L)) profiler.disable() profiler.print_stats(sort="cumtime")
def test_020_matmul_builder_to_succeed(): """ Objective: Verify the Matmul.build() Expected: build() parse the spec and succeed """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): M = np.random.randint(1, 100) D = np.random.randint(1, 100) # NOT including bias # ---------------------------------------------------------------------- # Validate the correct specification. # NOTE: Invalidate one parameter at a time from the correct one. # Otherwise not sure what you are testing. # ---------------------------------------------------------------------- lr = TYPE_FLOAT(np.random.uniform()) l2 = TYPE_FLOAT(np.random.uniform()) valid_matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he", }, _OPTIMIZER: { _SCHEME: "sGd", _PARAMETERS: { "lr": lr, "l2": l2 } } } try: matmul: Matmul = Matmul.build(valid_matmul_spec) assert matmul.optimizer.lr == lr assert matmul.optimizer.l2 == l2 except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_SCHEME] = "sgd" try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_SCHEME] = "SGD" try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) profiler.disable() profiler.print_stats(sort="cumtime")
def test_020_matmul_builder_to_fail_optimizer_spec(): """ Objective: Verify the Matmul.build() Expected: build() parse the spec and fail with invalid configurations """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): M = np.random.randint(1, 100) D = np.random.randint(1, 100) # NOT including bias # ---------------------------------------------------------------------- # Validate the correct specification. # NOTE: Invalidate one parameter at a time from the correct one. # Otherwise not sure what you are testing. # ---------------------------------------------------------------------- valid_matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he" }, _OPTIMIZER: { _SCHEME: "sGd", _PARAMETERS: { "lr": TYPE_FLOAT(np.random.uniform()), "l2": TYPE_FLOAT(np.random.uniform()) } }, "log_level": logging.ERROR } try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER] = "" try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid optimizer spec") except AssertionError: pass matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_SCHEME] = "invalid" try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid optimizer spec") except AssertionError: pass matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_PARAMETERS]["lr"] = np.random.uniform(-1, 0) try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid lr value") except AssertionError: pass matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_PARAMETERS]["l2"] = np.random.uniform(-1, 0) try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid l2 value") except AssertionError: pass profiler.disable() profiler.print_stats(sort="cumtime")
def test_020_matmul_instance_properties(): """ Objective: Verify the layer class validates the parameters have been initialized before accessed. Expected: Initialization detects the access to the non-initialized parameters and fails. """ msg = "Accessing uninitialized property of the layer must fail." for _ in range(NUM_MAX_TEST_TIMES): name = random_string(np.random.randint(1, 10)) M: int = np.random.randint(1, NUM_MAX_NODES) D: int = np.random.randint(1, NUM_MAX_FEATURES) matmul = Matmul(name=name, num_nodes=M, W=weights.uniform(M, D + 1), log_level=logging.DEBUG) # -------------------------------------------------------------------------------- # To pass # -------------------------------------------------------------------------------- try: if not matmul.name == name: raise RuntimeError("matmul.name == name should be true") except AssertionError as e: raise RuntimeError( "Access to name should be allowed as already initialized." ) from e try: if not matmul.M == M: raise RuntimeError("matmul.M == M should be true") except AssertionError as e: raise RuntimeError( "Access to M should be allowed as already initialized.") from e try: if not isinstance(matmul.logger, logging.Logger): raise RuntimeError( "isinstance(matmul.logger, logging.Logger) should be true") except AssertionError as e: raise RuntimeError( "Access to logger should be allowed as already initialized." ) from e try: a = matmul.D except AssertionError: raise RuntimeError( "Access to D should be allowed as already initialized.") try: matmul.W is not None except AssertionError: raise RuntimeError( "Access to W should be allowed as already initialized.") try: matmul.optimizer is not None except AssertionError: raise RuntimeError( "Access to optimizer should be allowed as already initialized." ) # -------------------------------------------------------------------------------- # To fail # -------------------------------------------------------------------------------- try: print(matmul.X) raise RuntimeError(msg) except AssertionError: pass try: matmul.X = int(1) raise RuntimeError(msg) except AssertionError: pass try: print(matmul.dX) raise RuntimeError(msg) except AssertionError: pass try: print(matmul.dW) raise RuntimeError(msg) except AssertionError: pass try: print(matmul.Y) raise RuntimeError(msg) except AssertionError: pass try: matmul._Y = int(1) print(matmul.Y) raise RuntimeError(msg) except AssertionError: pass try: print(matmul.dY) raise RuntimeError(msg) except AssertionError: pass try: matmul._dY = int(1) print(matmul.dY) raise RuntimeError(msg) except AssertionError: pass try: print(matmul.T) raise RuntimeError(msg) except AssertionError: pass try: matmul.T = float(1) raise RuntimeError(msg) except AssertionError: pass try: # pylint: disable=not-callable matmul.objective(np.array(1.0, dtype=TYPE_FLOAT)) raise RuntimeError(msg) except AssertionError: pass try: print(matmul.N) raise RuntimeError(msg) except AssertionError: pass assert matmul.name == name assert matmul.num_nodes == M try: matmul = Matmul(name=name, num_nodes=M, W=weights.xavier(M, D + 1), log_level=logging.DEBUG) matmul.function(int(1)) raise RuntimeError("Invoke matmul.function(int(1)) must fail.") except AssertionError: pass try: matmul = Matmul(name=name, num_nodes=M, W=weights.xavier(M, D + 1), log_level=logging.DEBUG) matmul.gradient(int(1)) raise RuntimeError("Invoke matmul.gradient(int(1)) must fail.") except AssertionError: pass
def train_binary_classifier(N: int, D: int, M: int, X: np.ndarray, T: np.ndarray, W: np.ndarray, log_loss_function: Callable, optimizer: Optimizer, num_epochs: int = 100, test_numerical_gradient: bool = False, log_level: int = logging.ERROR, callback: Callable = None): """Test case for binary classification with matmul + log loss. Args: N: Batch size D: Number of features M: Number of nodes. 1 for sigmoid and 2 for softmax X: train data T: labels W: weight log_loss_function: cross entropy logg loss function optimizer: Optimizer num_epochs: Number of epochs to run test_numerical_gradient: Flag if test the analytical gradient with the numerical one. log_level: logging level callback: callback function to invoke at the each epoch end. """ name = __name__ assert isinstance(T, np.ndarray) and np.issubdtype( T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N assert isinstance( X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[ 0] == N and X.shape[1] == D assert isinstance( W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[ 0] == M and W.shape[1] == D + 1 assert num_epochs > 0 and N > 0 and D > 0 assert ((log_loss_function == sigmoid_cross_entropy_log_loss and M == 1) or (log_loss_function == softmax_cross_entropy_log_loss and M >= 2)) # -------------------------------------------------------------------------------- # Instantiate a CrossEntropyLogLoss layer # -------------------------------------------------------------------------------- loss = CrossEntropyLogLoss(name="loss", num_nodes=M, log_loss_function=log_loss_function, log_level=log_level) # -------------------------------------------------------------------------------- # Instantiate a Matmul layer # -------------------------------------------------------------------------------- matmul = Matmul(name="matmul", num_nodes=M, W=W, optimizer=optimizer, log_level=log_level) matmul.objective = loss.function num_no_progress: int = 0 # how many time when loss L not decreased. loss.T = T history: List[np.ndarray] = [loss.function(matmul.function(X))] for i in range(num_epochs): # -------------------------------------------------------------------------------- # Layer forward path # Calculate the matmul output Y=f(X), and get the loss L = objective(Y) # Test the numerical gradient dL/dX=matmul.gradient_numerical(). # -------------------------------------------------------------------------------- Y = matmul.function(X) L = loss.function(Y) if not (i % 50): print(f"iteration {i} Loss {L}") Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L) # -------------------------------------------------------------------------------- # Constraint: 1. Objective/Loss L(Yn+1) after gradient descent < L(Yn) # -------------------------------------------------------------------------------- if L >= history[-1] and (i % 20) == 1: Logger.warning( "Iteration [%i]: Loss[%s] has not improved from the previous [%s].", i, L, history[-1]) if (num_no_progress := num_no_progress + 1) > 20: Logger.error( "The training has no progress more than %s times.", num_no_progress) # break else: num_no_progress = 0 history.append(L) # -------------------------------------------------------------------------------- # Expected dL/dW.T = X.T @ dL/dY = X.T @ (P-T) / N, and dL/dX = dL/dY @ W # P = sigmoid(X) or softmax(X) # dL/dX = dL/dY * W is to use W BEFORE updating W. # -------------------------------------------------------------------------------- P = None if log_loss_function == sigmoid_cross_entropy_log_loss: # P = sigmoid(np.matmul(X, W.T)) P = sigmoid(np.matmul(matmul.X, matmul.W.T)) P = P - T.reshape(-1, 1) # T(N,) -> T(N,1) to align with P(N,1) assert P.shape == ( N, 1), "P.shape is %s T.shape is %s" % (P.shape, T.shape) elif log_loss_function == softmax_cross_entropy_log_loss: # matmul.X.shape is (N, D+1), matmul.W.T.shape is (D+1, M) P = softmax(np.matmul(matmul.X, matmul.W.T)) # (N, M) P[np.arange(N), T] -= 1 EDX = np.matmul(P / N, matmul.W) # (N,M) @ (M, D+1) -> (N, D+1) EDX = EDX[::, 1:] # Hide the bias -> (N, D) EDW = np.matmul(matmul.X.T, P / N).T # ((D+1,N) @ (N, M)).T -> (M, D+1) # -------------------------------------------------------------------------------- # Layer backward path # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY. # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX. # -------------------------------------------------------------------------------- before = copy.deepcopy(matmul.W) dY = loss.gradient(TYPE_FLOAT(1)) dX = matmul.gradient(dY) # gradient descent and get the analytical gradients dS=[dL/dX, dL/dW] # dL/dX.shape = (N, D) # dL/dW.shape = (M, D+1) dS = matmul.update() dW = dS[0] # -------------------------------------------------------------------------------- # Constraint 1. W in the matmul has been updated by the gradient descent. # -------------------------------------------------------------------------------- Logger.debug("W after is \n%s", matmul.W) assert not np.array_equal(before, matmul.W), "W has not been updated." if not validate_against_expected_gradient(EDX, dX): Logger.warning("Expected dL/dX \n%s\nDiff\n%s", EDX, EDX - dX) if not validate_against_expected_gradient(EDW, dW): Logger.warning("Expected dL/dW \n%s\nDiff\n%s", EDW, EDW - dW) if test_numerical_gradient: # -------------------------------------------------------------------------------- # Numerical gradients gn=[dL/dX, dL/dW] # dL/dX.shape = (N, D) # dL/dW.shape = (M, D+1) # -------------------------------------------------------------------------------- gn = matmul.gradient_numerical() validate_against_numerical_gradient([dX] + dS, gn, Logger) if callback: # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0)) callback(W=matmul.W[0])
def validate_relu_neuron_training(matmul: Matmul, activation: ReLU, loss: CrossEntropyLogLoss, X: np.ndarray, T: np.ndarray, num_epochs: int = 100, test_numerical_gradient: bool = False, callback: Callable = None): activation.objective = loss.function matmul.objective = compose(activation.function, loss.function) objective = compose(matmul.function, matmul.objective) num_no_progress: int = 0 # how many time when loss L not decreased. history: List[np.ndarray] = [] loss.T = T for i in range(num_epochs): L = objective(X) N = X.shape[0] P = softmax(relu(np.matmul(matmul.X, matmul.W.T))) EDA = expected_gradient_from_log_loss(P=P, T=T, N=N) # ******************************************************************************** # Constraint: Expected gradients must match actual # ******************************************************************************** validate_relu_neuron_round_trip(matmul=matmul, activation=activation, X=X, dA=EDA) # -------------------------------------------------------------------------------- # gradient descent and get the analytical dL/dX, dL/dW # -------------------------------------------------------------------------------- previous_W = copy.deepcopy(matmul.W) matmul.update() # dL/dX, dL/dW # ******************************************************************************** # Constraint. W in the matmul has been updated by the gradient descent. # ******************************************************************************** Logger.debug("W after is \n%s", matmul.W) if np.array_equal(previous_W, matmul.W): Logger.warning("W has not been updated") # ******************************************************************************** # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn) # ******************************************************************************** if i > 0 and L >= history[-1]: Logger.warning( "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.", i, L, history[-1], num_no_progress + 1) # -------------------------------------------------------------------------------- # Reduce the learning rate can make the situation worse. # When reduced the lr every time L >= history, the (L >= history) became successive # and eventually exceeded 50 successive non-improvement ending in failure. # Keep the learning rate make the L>=history more frequent but still up to 3 # successive events, and the training still kept progressing. # -------------------------------------------------------------------------------- num_no_progress += 1 if num_no_progress > 5: matmul.lr = matmul.lr * 0.95 if num_no_progress > 50: Logger.error( "The training has no progress more than %s times.", num_no_progress) break else: num_no_progress = 0 history.append(L) if callback: callback(W=matmul.W) return history