Python CrossEntropyLogLoss примеры использования

Язык программирования: Python

Пространство имен/Пакет: layer

Класс/Тип: CrossEntropyLogLoss

Примеров на hotexamples.com: 11

Python CrossEntropyLogLoss - 11 примеров найдено. Это лучшие примеры Python кода для layer.CrossEntropyLogLoss, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CrossEntropyLogLoss(7)

T(6)

function(5)

gradient(5)

specification(3)

X(2)

gradient_numerical(2)

_D(1)

_Y(1)

_dX(1)

_dY(1)

objective(1)

Пример #1

Показать файл

def test_040_objective_instantiation_to_fail():
    """
    Objective:
        Verify the layer class validates the initialization parameter constraints.
    Expected:
        Initialization detects parameter constraints not meet and fails.
    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        M: int = np.random.randint(1, NUM_MAX_NODES)
        # Constraint: Name is string with length > 0.
        try:
            CrossEntropyLogLoss(
                name="",
                num_nodes=1,
                log_loss_function=sigmoid_cross_entropy_log_loss)
            raise RuntimeError(
                "CrossEntropyLogLoss initialization with invalid name must fail"
            )
        except AssertionError:
            pass

        # Constraint: num_nodes == 1
        try:
            CrossEntropyLogLoss(
                name="test_040_objective",
                num_nodes=0,
                log_loss_function=sigmoid_cross_entropy_log_loss)
            raise RuntimeError("CrossEntropyLogLoss(num_nodes<1) must fail.")
        except AssertionError:
            pass

        try:
            CrossEntropyLogLoss(
                name="test_040_objective",
                num_nodes=np.random.randint(2, NUM_MAX_NODES),
                log_loss_function=sigmoid_cross_entropy_log_loss)
            raise RuntimeError("CrossEntropyLogLoss(num_nodes>1) must fail.")
        except AssertionError:
            pass

        # Constraint: logging level is correct.
        try:
            CrossEntropyLogLoss(
                name="test_040_objective",
                num_nodes=M,
                log_loss_function=sigmoid_cross_entropy_log_loss,
                log_level=-1)
            raise RuntimeError(
                "CrossEntropyLogLoss initialization with invalid log level must fail"
            )
        except (AssertionError, KeyError):
            pass

    profiler.disable()
    profiler.print_stats(sort="cumtime")

Пример #2

Показать файл

Файл: utility.py Проект: oonisim/python-programs

 def output(m, d):
     return {
         "matmul":
         Matmul.specification(
             name="matmul",
             num_nodes=m,
             num_features=d,
             weights_initialization_scheme="he",
             weights_optimizer_specification=optimizer.SGD.specification(
                 lr=0.05, l2=1e-3)),
         "loss":
         CrossEntropyLogLoss.specification(name="loss", num_nodes=m)
     }

Пример #3

Показать файл

Файл: test_020_binary_classifier.py Проект: oonisim/python-programs

def test():
    M = 1
    D = 2
    N = 100

    X, T, V = linear_separable(d=D, n=N)
    x_min, x_max = X[:, 0].min(), X[:, 0].max()
    y_min, y_max = X[:, 1].min(), X[:, 1].max()

    sigmoid_classifier_specification = {
        _NAME: "softmax_classifier",
        _NUM_NODES: M,
        _LOG_LEVEL: logging.ERROR,
        _COMPOSITE_LAYER_SPEC: {
            "matmul01":
            Matmul.specification(
                name="matmul",
                num_nodes=M,
                num_features=D,
                weights_initialization_scheme="he",
                weights_optimizer_specification=SGD.specification(
                    lr=TYPE_FLOAT(0.2), l2=TYPE_FLOAT(1e-3))),
            "loss":
            CrossEntropyLogLoss.specification(
                name="loss",
                num_nodes=M,
                loss_function=sigmoid_cross_entropy_log_loss.__qualname__)
        }
    }
    logistic_classifier = SequentialNetwork.build(
        specification=sigmoid_classifier_specification, )

    for i in range(50):
        logistic_classifier.train(X=X, T=T)

    prediction = logistic_classifier.predict(
        np.array([-1., -1.], dtype=TYPE_FLOAT))
    np.isin(prediction, [0, 1])
    print(prediction)

Пример #4

Показать файл

Файл: test_030_matmul_bn_relu_classifier.py Проект: oonisim/python-programs

def train_matmul_bn_relu_classifier(N: int,
                                    D: int,
                                    M: int,
                                    X: np.ndarray,
                                    T: np.ndarray,
                                    W: np.ndarray,
                                    log_loss_function: Callable,
                                    optimizer: Optimizer,
                                    num_epochs: int = 100,
                                    test_numerical_gradient: bool = False,
                                    log_level: int = logging.ERROR,
                                    callback: Callable = None):
    """Test case for binary classification with matmul + log loss.
    Args:
        N: Batch size
        D: Number of features
        M: Number of nodes. 1 for sigmoid and 2 for softmax
        X: train data
        T: labels
        W: weight
        log_loss_function: cross entropy logg loss function
        optimizer: Optimizer
        num_epochs: Number of epochs to run
        test_numerical_gradient: Flag if test the analytical gradient with the numerical one.
        log_level: logging level
        callback: callback function to invoke at the each epoch end.
    """
    name = __name__
    assert isinstance(T, np.ndarray) and np.issubdtype(
        T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N
    assert isinstance(
        X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[
            0] == N and X.shape[1] == D
    assert isinstance(
        W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[
            0] == M and W.shape[1] == D + 1
    assert num_epochs > 0 and N > 0 and D > 0

    assert (log_loss_function == softmax_cross_entropy_log_loss and M >= 2)

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    loss: CrossEntropyLogLoss = CrossEntropyLogLoss(
        name="loss",
        num_nodes=M,
        log_loss_function=log_loss_function,
        log_level=log_level)

    # --------------------------------------------------------------------------------
    # Instantiate a ReLU layer
    # --------------------------------------------------------------------------------
    activation: ReLU = ReLU(name="relu", num_nodes=M, log_level=log_level)
    activation.objective = loss.function

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    bn: BatchNormalization = BatchNormalization(name=name,
                                                num_nodes=M,
                                                log_level=logging.WARNING)
    bn.objective = compose(activation.function, activation.objective)

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    matmul: Matmul = Matmul(name="matmul",
                            num_nodes=M,
                            W=W,
                            optimizer=optimizer,
                            log_level=log_level)
    matmul.objective = compose(bn.function, bn.objective)

    # --------------------------------------------------------------------------------
    # Instantiate a Normalization layer
    # Need to apply the same mean and std to the non-training data set.
    # --------------------------------------------------------------------------------
    # norm = Standardization(
    #     name="standardization",
    #     num_nodes=M,
    #     log_level=log_level
    # )
    # X = np.copy(X)
    # X = norm.function(X)

    # Network objective function f: L=f(X)
    objective = compose(matmul.function, matmul.objective)
    prediction = compose(matmul.predict, bn.predict, activation.predict)

    num_no_progress: int = 0  # how many time when loss L not decreased.
    loss.T = T
    # pylint: disable=not-callable
    history: List[np.ndarray] = [matmul.objective(matmul.function(X))]

    for i in range(num_epochs):
        # --------------------------------------------------------------------------------
        # Layer forward path
        # 1. Calculate the matmul output Y=matmul.f(X)
        # 2. Calculate the ReLU output A=activation.f(Y)
        # 3. Calculate the loss L = loss(A)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        # --------------------------------------------------------------------------------
        Y = matmul.function(X)
        BN = bn.function(Y)
        A = activation.function(BN)
        L = loss.function(A)

        # ********************************************************************************
        # Constraint: Network objective L must match layer-by-layer output
        # ********************************************************************************
        # pylint: disable=not-callable
        assert L == objective(X) and L.shape == (), \
            f"Network objective L(X) %s must match layer-by-layer output %s." \
            % (objective(X), L)

        if not (i % 10): print(f"iteration {i} Loss {L}")
        Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L)

        # ********************************************************************************
        # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # ********************************************************************************
        if L >= history[-1] and i > 0:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.",
                i, L, history[-1], num_no_progress + 1)
            # --------------------------------------------------------------------------------
            # Reduce the learning rate can make the situation worse.
            # When reduced the lr every time L >= history, the (L >= history) became successive
            # and eventually exceeded 50 successive non-improvement ending in failure.
            # Keep the learning rate make the L>=history more frequent but still up to 3
            # successive events, and the training still kept progressing.
            # --------------------------------------------------------------------------------
            num_no_progress += 1
            if num_no_progress > 5:
                matmul.lr = matmul.lr * TYPE_FLOAT(0.95)

            if num_no_progress > 50:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                break
        else:
            num_no_progress = 0

        history.append(L)

        # ================================================================================
        # Layer backward path
        # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY.
        # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # ================================================================================
        before = copy.deepcopy(matmul.W)
        dA = loss.gradient(TYPE_FLOAT(1))  # dL/dA
        dBN = activation.gradient(dA)  # dL/dBN
        dY = bn.gradient(dBN)  # dL/dY
        dX = matmul.gradient(dY)  # dL/dX

        # gradient descent and get the analytical gradients
        bn.update()

        dS = matmul.update()  # dL/dX, dL/dW
        # ********************************************************************************
        #  Constraint. W in the matmul has been updated by the gradient descent.
        # ********************************************************************************
        Logger.debug("W after is \n%s", matmul.W)
        assert not np.array_equal(before, matmul.W), "W has not been updated."

        if test_numerical_gradient:
            # --------------------------------------------------------------------------------
            # Numerical gradient
            # --------------------------------------------------------------------------------
            gn = matmul.gradient_numerical()
            validate_against_numerical_gradient([dX] + dS, gn,
                                                Logger)  # prepend dL/dX

        if callback:
            # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0))
            callback(W=matmul.W)

    return matmul.W, objective, prediction

Пример #5

Показать файл

def test_040_objective_instance_properties():
    """
    Objective:
        Verify the layer class validates the parameters have been initialized before accessed.
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
    """
    msg = "Accessing uninitialized property of the layer must fail."
    name = random_string(np.random.randint(1, 10))
    for _ in range(NUM_MAX_TEST_TIMES):
        M: int = 1
        layer = CrossEntropyLogLoss(
            name=name,
            num_nodes=1,
            log_loss_function=sigmoid_cross_entropy_log_loss,
            log_level=logging.DEBUG)

        # --------------------------------------------------------------------------------
        # To pass
        # --------------------------------------------------------------------------------
        try:
            if not layer.name == name:
                raise RuntimeError("layer.name == name should be true")
        except AssertionError:
            raise RuntimeError(
                "Access to name should be allowed as already initialized.")

        try:
            if not layer.M == M:
                raise RuntimeError("layer.M == M should be true")
        except AssertionError:
            raise RuntimeError(
                "Access to M should be allowed as already initialized.")

        try:
            if not isinstance(layer.logger, logging.Logger):
                raise RuntimeError(
                    "isinstance(layer.logger, logging.Logger) should be true")
        except AssertionError:
            raise RuntimeError(
                "Access to logger should be allowed as already initialized.")

        # --------------------------------------------------------------------------------
        # To fail
        # --------------------------------------------------------------------------------
        try:
            print(layer.X)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            layer.X = int(1)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.N)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.dX)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.Y)
            raise RuntimeError(msg)
        except AssertionError:
            pass
        try:
            print(layer.P)
            raise RuntimeError(msg)
        except AssertionError:
            pass
        try:
            layer._Y = int(1)
            print(layer.Y)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.dY)
            raise RuntimeError(msg)
        except AssertionError:
            pass
        try:
            layer._dY = int(1)
            print(layer.dY)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.T)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.L)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(layer.J)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            layer.T = float(1)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            layer.function(int(1))
            raise RuntimeError("Invoke layer.function(int(1)) must fail.")
        except AssertionError:
            pass

        try:
            layer.function(TYPE_FLOAT(1.0))
            layer.gradient(int(1))
            raise RuntimeError("Invoke layer.gradient(int(1)) must fail.")
        except AssertionError:
            pass

Пример #6

Показать файл

def disabled_test_040_objective_methods_2d_ohe(caplog):
    """
    TODO: Disabled as need to redesign numerical_jacobian for 32 bit floating.

    Objective:
        Verify the forward path constraints:
        1. Layer output L/loss is np.sum(sigmoid_cross_entropy_log_loss) / N.
        2. gradient_numerical() == numerical Jacobian numerical_jacobian(O, X).

        Verify the backward path constraints:
        1. Analytical gradient G: gradient() == (P-1)/N
        2. Analytical gradient G is close to GN: gradient_numerical().
    """
    caplog.set_level(logging.DEBUG)

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    name = "test_040_objective_methods_2d_ohe"

    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = 1  # node number is 1 for 0/1 binary classification.
        layer = CrossEntropyLogLoss(
            name=name,
            num_nodes=M,
            log_loss_function=sigmoid_cross_entropy_log_loss,
            log_level=logging.DEBUG)

        # ================================================================================
        # Layer forward path
        # ================================================================================
        X = np.random.randn(N, M).astype(TYPE_FLOAT)
        T = np.zeros_like(X, dtype=TYPE_LABEL)  # OHE labels.
        T[np.arange(N), np.random.randint(0, M, N)] = TYPE_LABEL(1)

        # log_loss function require (X, T) in X(N, M), and T(N, M) in OHE label format.
        X, T = transform_X_T(X, T)
        layer.T = T
        Logger.debug("%s: X is \n%s\nT is \n%s", name, X, T)

        # --------------------------------------------------------------------------------
        # Expected analytical gradient EG = (dX/dL) = (A-T)/N
        # --------------------------------------------------------------------------------
        A = sigmoid(X)
        EG = ((A - T).astype(TYPE_FLOAT) / TYPE_FLOAT(N))

        # --------------------------------------------------------------------------------
        # Total loss Z = np.sum(J)/N
        # Expected loss EL = sum((1-T)X + np.log(1 + np.exp(-X)))
        # (J, P) = sigmoid_cross_entropy_log_loss(X, T) and J:shape(N,) where J:shape(N,)
        # is loss for each input and P is activation by sigmoid(X).
        # --------------------------------------------------------------------------------
        L = layer.function(X)
        J, P = sigmoid_cross_entropy_log_loss(X, T)
        EL = np.array(np.sum((1 - T) * X + logarithm(1 + np.exp(-X))) / N,
                      dtype=TYPE_FLOAT)

        # Constraint: A == P as they are sigmoid(X)
        assert np.all(np.abs(A-P) < ACTIVATION_DIFF_ACCEPTANCE_VALUE), \
            f"Need A==P==sigmoid(X) but A=\n{A}\n P=\n{P}\n(A-P)=\n{(A-P)}\n"

        # Constraint: Log loss layer output L == sum(J) from the log loss function
        Z = np.array(np.sum(J) / N, dtype=TYPE_FLOAT)
        assert np.array_equal(L, Z), \
            f"Need log loss layer output L == sum(J) but L=\n{L}\nZ=\n{Z}."

        # Constraint: L/loss is close to expected loss EL.
        assert np.all(np.abs(EL-L) < LOSS_DIFF_ACCEPTANCE_VALUE), \
            "Need EL close to L but \nEL=\n{EL}\nL=\n{L}\n"

        # --------------------------------------------------------------------------------
        # constraint: gradient_numerical() == numerical_jacobian(objective, X)
        # TODO: compare the diff to accommodate numerical errors.
        # --------------------------------------------------------------------------------
        GN = layer.gradient_numerical()  # [dL/dX] from the layer

        def objective(x):
            """Function to calculate the scalar loss L for cross entropy log loss"""
            j, p = sigmoid_cross_entropy_log_loss(x, T)
            return np.array(np.sum(j) / N, dtype=TYPE_FLOAT)

        EGN = numerical_jacobian(objective, X)  # Expected numerical dL/dX
        assert np.array_equal(GN[0], EGN), \
            f"GN[0]==EGN expected but GN[0] is \n%s\n EGN is \n%s\n" % (GN[0], EGN)

        # ================================================================================
        # Layer backward path
        # ================================================================================
        # constraint: Analytical gradient G: gradient() == (P-1)/N.
        dY = TYPE_FLOAT(1)
        G = layer.gradient(dY)
        assert np.all(np.abs(G-EG) <= GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            f"Layer gradient dL/dX \n{G} \nneeds to be \n{EG}."

        # constraint: Analytical gradient G is close to GN: gradient_numerical().
        assert \
            np.allclose(GN[0], G, atol=GRADIENT_DIFF_ACCEPTANCE_VALUE, rtol=GRADIENT_DIFF_ACCEPTANCE_RATIO), \
            f"dX is \n{G}\nGN[0] is \n{GN[0]}\nRDiff is \n{G-GN[0]}.\n"

        # constraint: Gradient g of the log loss layer needs -1 < g < 1
        # abs(P-T) = abs(sigmoid(X)-T) cannot be > 1.
        assert np.all(np.abs(G) < 1), \
            f"Log loss layer gradient cannot be < -1 nor > 1 but\n{G}"
        assert np.all(np.abs(GN[0]) < (1+GRADIENT_DIFF_ACCEPTANCE_RATIO)), \
            f"Log loss layer gradient cannot be < -1 nor > 1 but\n{GN[0]}"

    profiler.disable()
    profiler.print_stats(sort="cumtime")

Пример #7

Показать файл

def disabled_test_040_objective_methods_1d_ohe():
    """
    TODO: Disabled as need to redesign numerical_jacobian for 32 bit floating.

    Objective:
        Verify the forward path constraints:
        1. Layer output L/loss is np.sum(cross_entropy_log_loss(sigmoid(X), T, f=logistic_log_loss))) / N.
        2. gradient_numerical() == numerical Jacobian numerical_jacobian(O, X).

        Verify the backward path constraints:
        1. Analytical gradient G: gradient() == (P-1)/N
        2. Analytical gradient G is close to GN: gradient_numerical().
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
        
        For X.ndim > 0, the layer transform X into 2D so as to use the numpy tuple-
        like indexing:
        P[
            (0,3),
            (2,4)
        ]
        Hence, the shape of GN, G are 2D.
    """
    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    name = "test_040_objective_methods_1d_ohe"
    N = 1

    for _ in range(NUM_MAX_TEST_TIMES):
        layer = CrossEntropyLogLoss(
            name=name,
            num_nodes=1,
            log_loss_function=sigmoid_cross_entropy_log_loss,
            log_level=logging.DEBUG)

        # ================================================================================
        # Layer forward path
        # ================================================================================
        X = TYPE_FLOAT(
            np.random.uniform(low=-BOUNDARY_SIGMOID, high=BOUNDARY_SIGMOID))
        T = TYPE_LABEL(np.random.randint(0, 2))  # OHE labels.

        # log_loss function require (X, T) in X(N, M), and T(N, M) in OHE label format.
        X, T = transform_X_T(X, T)
        layer.T = T

        # Expected analytical gradient dL/dX = (P-T)/N of shape (N,M)
        A = sigmoid(X)
        EG = ((A - T) / N).reshape(1, -1).astype(TYPE_FLOAT)

        Logger.debug("%s: X is \n%s\nT is %s\nP is %s\nEG is %s\n", name, X, T,
                     A, EG)

        # --------------------------------------------------------------------------------
        # constraint: L/loss == np.sum(J) / N.
        # J, P = sigmoid_cross_entropy_log_loss(X, T)
        # --------------------------------------------------------------------------------
        L = layer.function(X)  # L is shape ()
        J, P = sigmoid_cross_entropy_log_loss(X, T)
        Z = np.array(np.sum(J), dtype=TYPE_FLOAT) / TYPE_FLOAT(N)
        assert np.array_equal(L, Z), f"LogLoss output should be {L} but {Z}."

        # --------------------------------------------------------------------------------
        # constraint: gradient_numerical() == numerical Jacobian numerical_jacobian(O, X)
        # Use a dummy layer for the objective function because using the "layer"
        # updates the X, Y which can interfere the independence of the layer.
        # --------------------------------------------------------------------------------
        GN = layer.gradient_numerical()  # [dL/dX] from the layer

        # --------------------------------------------------------------------------------
        # Cannot use CrossEntropyLogLoss.function() to simulate the objective function L.
        # because it causes applying transform_X_T multiple times.
        # Because internally transform_X_T(X, T) has transformed T into the index label
        # in 1D with with length 1 by "T = T.reshape(-1)".
        # Then providing X in 1D into "dummy.function(x)" re-run "transform_X_T(X, T)"
        # again. The (X.ndim == T.ndim ==1) as an input and T must be OHE label for such
        # combination and T.shape == P.shape must be true for OHE labels.
        # However, T has been converted into the index format already by transform_X_T
        # (applying transform_X_T multiple times) and (T.shape=(1,1), X.shape=(1, > 1)
        # that violates the (X.shape == T.shape) constraint.
        # --------------------------------------------------------------------------------
        # dummy = CrossEntropyLogLoss(
        #     name="dummy",
        #     num_nodes=M,
        #     log_level=logging.DEBUG
        # )
        # dummy.T = T
        # dummy.objective = objective
        # dummy.function(X)
        # --------------------------------------------------------------------------------
        def objective(x):
            j, p = sigmoid_cross_entropy_log_loss(x, T)
            return np.array(np.sum(j) / N, dtype=TYPE_FLOAT)

        EGN = numerical_jacobian(objective,
                                 X).reshape(1, -1)  # Expected numerical dL/dX
        assert np.array_equal(GN[0], EGN), \
            f"Layer gradient_numerical GN \n{GN} \nneeds to be \n{EGN}."

        # ================================================================================
        # Layer backward path
        # ================================================================================
        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G: gradient() == (P-1)/N.
        # --------------------------------------------------------------------------------
        dY = TYPE_FLOAT(1)
        G = layer.gradient(dY)
        assert np.all(np.abs(G-EG) <= GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            f"Layer gradient dL/dX \n{G} \nneeds to be \n{EG}."

        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G is close to GN: gradient_numerical().
        # --------------------------------------------------------------------------------
        assert \
            np.all(np.abs(G-GN[0]) <= GRADIENT_DIFF_ACCEPTANCE_VALUE) or \
            np.all(np.abs(G-GN[0]) <= np.abs(GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0])), \
            "dX is \n%s\nGN is \n%s\nG-GN is \n%s\n Ratio * GN[0] is \n%s.\n" \
            % (G, GN[0], G-GN[0], GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0])

Пример #8

Показать файл

def test_040_objective_instantiation():
    """
    Objective:
        Verify the initialized layer instance provides its properties.
    Expected:
        * name, num_nodes, M, log_level are the same as initialized.
        * X, T, dY, objective returns what is set.
        * N, M property are provided after X is set.
        * Y, P, L properties are provided after function(X).
        * gradient(dL/dY) repeats dL/dY,
        * gradient_numerical() returns 1
    """
    name = "test_040_objective_instantiation"
    for _ in range(NUM_MAX_TEST_TIMES):
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = 1
        # For sigmoid log loss layer, the number of features N in X is the same with node number.
        D: int = M
        layer = CrossEntropyLogLoss(
            name=name,
            num_nodes=M,
            log_loss_function=sigmoid_cross_entropy_log_loss,
            log_level=logging.DEBUG)

        # --------------------------------------------------------------------------------
        # Properties
        # --------------------------------------------------------------------------------
        assert layer.name == name
        assert layer.num_nodes == layer.M == M

        layer._D = D
        assert layer.D == D

        X = np.random.randn(N, D).astype(TYPE_FLOAT)
        layer.X = X
        assert np.array_equal(layer.X, X)
        assert layer.N == N == X.shape[0]
        # For sigmoid log loss layer, the number of features N in X is the same with node number.
        assert layer.M == X.shape[1]

        layer._dX = X
        assert np.array_equal(layer.dX, X)

        T = np.random.randint(0, M, N).astype(TYPE_LABEL)
        layer.T = T
        assert np.array_equal(layer.T, T)

        # layer.function() gives the total loss L in shape ().
        # log_loss function require (X, T) in X(N, M), and T(N, M) in OHE label format.
        X, T = transform_X_T(X, T)
        L = layer.function(X)
        J, P = sigmoid_cross_entropy_log_loss(X, T)
        assert \
            L.shape == () and np.allclose(L, (np.sum(J) / N).astype(TYPE_FLOAT)) and L == layer.Y, \
            "After setting T, layer.function(X) generates the total loss L but %s" % L

        # layer.function(X) sets layer.P to sigmoid_cross_entropy_log_loss(X, T)
        # P is nearly equal with sigmoid(X)
        assert \
            np.array_equal(layer.P, P) and \
            np.all(np.abs(layer.P - sigmoid(X)) < LOSS_DIFF_ACCEPTANCE_VALUE), \
            "layer.function(X) needs to set P as sigmoid_cross_entropy_log_loss(X, T) " \
            "which is close to sigmoid(X) but layer.P=\n%s\nP=\n%s\nsigmoid(X)=%s" \
            % (layer.P, P, sigmoid(X))

        # gradient of sigmoid cross entropy log loss layer is (P-T)/N
        G = layer.gradient()
        assert \
            np.all(np.abs(G - ((P-T)/N)) < GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            "Gradient G needs (P-T)/N but G=\n%s\n(P-T)/N=\n%s\n" % (G, (P-T)/N)

        layer.logger.debug("This is a pytest")

        # pylint: disable=not-callable
        assert \
            layer.objective(np.array(1.0, dtype=TYPE_FLOAT)) \
            == np.array(1.0, dtype=TYPE_FLOAT), \
            "Objective function of the output/last layer is an identity function."

Пример #9

Показать файл

def multilayer_network_specification_bn_to_fail(D, M01, M02, M):
    sequential_layer_specification_bn_to_fail = {
        "matmul01": layer.Matmul.specification(
            name="matmul01",
            num_nodes=M01,
            num_features=D,
            weights_initialization_scheme="he",
            weights_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3)
            )
        ),
        "bn01": layer.BatchNormalization.specification(
            name="bn01",
            num_nodes=M01,
            gamma_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3)
            ),
            beta_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3),
            ),
            momentum=TYPE_FLOAT(0.9)
        ),
        "relu01": layer.ReLU.specification(
            name="relu01",
            num_nodes=M01,
        ),
        "matmul02": layer.Matmul.specification(
            name="matmul01",
            num_nodes=M02,
            num_features=M01,
            weights_initialization_scheme="he",
            weights_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3)
            )
        ),
        "bn02": layer.BatchNormalization.specification(
            name="bn02",
            num_nodes=M02,
            gamma_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3)
            ),
            beta_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3),
            ),
            momentum=TYPE_FLOAT(0.9)
        ),
        "relu02": layer.ReLU.specification(
            name="relu02",
            num_nodes=M02,
        ),
        "matmul03": layer.Matmul.specification(
            name="matmul03",
            num_nodes=M,
            num_features=M02,
            weights_initialization_scheme="he",
            weights_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3)
            )
        ),
        "bn03": layer.BatchNormalization.specification(
            name="bn03",
            num_nodes=M,
            gamma_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3)
            ),
            beta_optimizer_specification=optimiser.SGD.specification(
                lr=TYPE_FLOAT(0.05),
                l2=TYPE_FLOAT(1e-3),
            ),
            momentum=TYPE_FLOAT(0.9)
        ),
        "loss": CrossEntropyLogLoss.specification(
            name="loss001", num_nodes=M
        )
    }

    return {
        _NAME: "two_layer_classifier_with_batch_normalization",
        _NUM_NODES: M,
        _LOG_LEVEL: logging.ERROR,
        _COMPOSITE_LAYER_SPEC: sequential_layer_specification_bn_to_fail
    }

Пример #10

Показать файл

Файл: test_020_binary_classifier.py Проект: oonisim/python-programs

def train_binary_classifier(N: int,
                            D: int,
                            M: int,
                            X: np.ndarray,
                            T: np.ndarray,
                            W: np.ndarray,
                            log_loss_function: Callable,
                            optimizer: Optimizer,
                            num_epochs: int = 100,
                            test_numerical_gradient: bool = False,
                            log_level: int = logging.ERROR,
                            callback: Callable = None):
    """Test case for binary classification with matmul + log loss.
    Args:
        N: Batch size
        D: Number of features
        M: Number of nodes. 1 for sigmoid and 2 for softmax
        X: train data
        T: labels
        W: weight
        log_loss_function: cross entropy logg loss function
        optimizer: Optimizer
        num_epochs: Number of epochs to run
        test_numerical_gradient: Flag if test the analytical gradient with the numerical one.
        log_level: logging level
        callback: callback function to invoke at the each epoch end.
    """
    name = __name__
    assert isinstance(T, np.ndarray) and np.issubdtype(
        T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N
    assert isinstance(
        X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[
            0] == N and X.shape[1] == D
    assert isinstance(
        W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[
            0] == M and W.shape[1] == D + 1
    assert num_epochs > 0 and N > 0 and D > 0

    assert ((log_loss_function == sigmoid_cross_entropy_log_loss and M == 1) or
            (log_loss_function == softmax_cross_entropy_log_loss and M >= 2))

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    loss = CrossEntropyLogLoss(name="loss",
                               num_nodes=M,
                               log_loss_function=log_loss_function,
                               log_level=log_level)

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    matmul = Matmul(name="matmul",
                    num_nodes=M,
                    W=W,
                    optimizer=optimizer,
                    log_level=log_level)
    matmul.objective = loss.function

    num_no_progress: int = 0  # how many time when loss L not decreased.
    loss.T = T
    history: List[np.ndarray] = [loss.function(matmul.function(X))]

    for i in range(num_epochs):
        # --------------------------------------------------------------------------------
        # Layer forward path
        # Calculate the matmul output Y=f(X), and get the loss L = objective(Y)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        # --------------------------------------------------------------------------------
        Y = matmul.function(X)
        L = loss.function(Y)

        if not (i % 50): print(f"iteration {i} Loss {L}")
        Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L)

        # --------------------------------------------------------------------------------
        # Constraint: 1. Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # --------------------------------------------------------------------------------
        if L >= history[-1] and (i % 20) == 1:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s].",
                i, L, history[-1])
            if (num_no_progress := num_no_progress + 1) > 20:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                # break
        else:
            num_no_progress = 0

        history.append(L)

        # --------------------------------------------------------------------------------
        # Expected dL/dW.T = X.T @ dL/dY = X.T @ (P-T) / N, and dL/dX = dL/dY @ W
        # P = sigmoid(X) or softmax(X)
        # dL/dX = dL/dY * W is to use W BEFORE updating W.
        # --------------------------------------------------------------------------------
        P = None
        if log_loss_function == sigmoid_cross_entropy_log_loss:
            # P = sigmoid(np.matmul(X, W.T))
            P = sigmoid(np.matmul(matmul.X, matmul.W.T))
            P = P - T.reshape(-1, 1)  # T(N,) -> T(N,1) to align with P(N,1)
            assert P.shape == (
                N, 1), "P.shape is %s T.shape is %s" % (P.shape, T.shape)

        elif log_loss_function == softmax_cross_entropy_log_loss:
            # matmul.X.shape is (N, D+1), matmul.W.T.shape is (D+1, M)
            P = softmax(np.matmul(matmul.X, matmul.W.T))  # (N, M)
            P[np.arange(N), T] -= 1

        EDX = np.matmul(P / N, matmul.W)  # (N,M) @ (M, D+1) -> (N, D+1)
        EDX = EDX[::, 1:]  # Hide the bias    -> (N, D)
        EDW = np.matmul(matmul.X.T,
                        P / N).T  # ((D+1,N) @ (N, M)).T -> (M, D+1)

        # --------------------------------------------------------------------------------
        # Layer backward path
        # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY.
        # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # --------------------------------------------------------------------------------
        before = copy.deepcopy(matmul.W)
        dY = loss.gradient(TYPE_FLOAT(1))
        dX = matmul.gradient(dY)

        # gradient descent and get the analytical gradients dS=[dL/dX, dL/dW]
        # dL/dX.shape = (N, D)
        # dL/dW.shape = (M, D+1)
        dS = matmul.update()
        dW = dS[0]
        # --------------------------------------------------------------------------------
        #  Constraint 1. W in the matmul has been updated by the gradient descent.
        # --------------------------------------------------------------------------------
        Logger.debug("W after is \n%s", matmul.W)
        assert not np.array_equal(before, matmul.W), "W has not been updated."

        if not validate_against_expected_gradient(EDX, dX):
            Logger.warning("Expected dL/dX \n%s\nDiff\n%s", EDX, EDX - dX)
        if not validate_against_expected_gradient(EDW, dW):
            Logger.warning("Expected dL/dW \n%s\nDiff\n%s", EDW, EDW - dW)

        if test_numerical_gradient:
            # --------------------------------------------------------------------------------
            # Numerical gradients gn=[dL/dX, dL/dW]
            # dL/dX.shape = (N, D)
            # dL/dW.shape = (M, D+1)
            # --------------------------------------------------------------------------------
            gn = matmul.gradient_numerical()
            validate_against_numerical_gradient([dX] + dS, gn, Logger)

        if callback:
            # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0))
            callback(W=matmul.W[0])

Пример #11

Показать файл

Файл: base.py Проект: oonisim/python-programs

def validate_relu_neuron_training(matmul: Matmul,
                                  activation: ReLU,
                                  loss: CrossEntropyLogLoss,
                                  X: np.ndarray,
                                  T: np.ndarray,
                                  num_epochs: int = 100,
                                  test_numerical_gradient: bool = False,
                                  callback: Callable = None):
    activation.objective = loss.function
    matmul.objective = compose(activation.function, loss.function)
    objective = compose(matmul.function, matmul.objective)

    num_no_progress: int = 0  # how many time when loss L not decreased.
    history: List[np.ndarray] = []

    loss.T = T
    for i in range(num_epochs):
        L = objective(X)
        N = X.shape[0]
        P = softmax(relu(np.matmul(matmul.X, matmul.W.T)))
        EDA = expected_gradient_from_log_loss(P=P, T=T, N=N)

        # ********************************************************************************
        # Constraint: Expected gradients must match actual
        # ********************************************************************************
        validate_relu_neuron_round_trip(matmul=matmul,
                                        activation=activation,
                                        X=X,
                                        dA=EDA)

        # --------------------------------------------------------------------------------
        # gradient descent and get the analytical dL/dX, dL/dW
        # --------------------------------------------------------------------------------
        previous_W = copy.deepcopy(matmul.W)
        matmul.update()  # dL/dX, dL/dW

        # ********************************************************************************
        #  Constraint. W in the matmul has been updated by the gradient descent.
        # ********************************************************************************
        Logger.debug("W after is \n%s", matmul.W)
        if np.array_equal(previous_W, matmul.W):
            Logger.warning("W has not been updated")

        # ********************************************************************************
        # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # ********************************************************************************
        if i > 0 and L >= history[-1]:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.",
                i, L, history[-1], num_no_progress + 1)
            # --------------------------------------------------------------------------------
            # Reduce the learning rate can make the situation worse.
            # When reduced the lr every time L >= history, the (L >= history) became successive
            # and eventually exceeded 50 successive non-improvement ending in failure.
            # Keep the learning rate make the L>=history more frequent but still up to 3
            # successive events, and the training still kept progressing.
            # --------------------------------------------------------------------------------
            num_no_progress += 1
            if num_no_progress > 5:
                matmul.lr = matmul.lr * 0.95

            if num_no_progress > 50:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                break
        else:
            num_no_progress = 0

        history.append(L)

        if callback:
            callback(W=matmul.W)

    return history