Exemplo n.º 1
0
    def test_misc(self):  # tests of non-main features to improve coverage
        for optimizer_name in ['AdamW', 'NadamW', 'SGDW']:
            cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue')
            reset_seeds()

            optimizer_kw = {
                'total_iterations': 0,
                'decay': 1e-3,
                'amsgrad': optimizer_name == 'AdamW',
                'nesterov': optimizer_name == 'SGDW'
            }
            num_batches = 4
            batch_size, timesteps = 16, 8
            batch_shape = (batch_size, timesteps)
            embed_input_dim = 5
            total_iterations = 0

            self.model = self._make_model(batch_shape,
                                          total_iterations,
                                          embed_input_dim=embed_input_dim,
                                          dense_constraint=1,
                                          l2_reg=1e-4,
                                          bidirectional=False,
                                          sparse=True)
            optimizer = self._make_optimizer(optimizer_name, self.model,
                                             **optimizer_kw)
            self.model.compile(optimizer,
                               loss='sparse_categorical_crossentropy')
            X, Y = self._make_data(num_batches,
                                   *batch_shape,
                                   embed_input_dim=embed_input_dim,
                                   sparse=True)

            for batch_num in range(num_batches):
                self.model.train_on_batch(X[batch_num], Y[batch_num])

            self._test_save_load(self.model, X, optimizer_name, optimizer)

            # util test
            dc = {'lstm': 0, 'dense': 0}
            fill_dict_in_order(dc, [1e-4, 2e-4])
            AdamW(model=self.model, zero_penalties=True)
            AdamW(model=self.model, weight_decays={'a': 0})

            # cleanup
            del self.model, optimizer
            reset_seeds(reset_graph_with_backend=K)

            cprint("\n<< {} MISC TEST PASSED >>\n".format(optimizer_name),
                   'green')
        cprint("\n<< ALL MISC TESTS PASSED >>\n", 'green')
def test_main():  # Save/Load, Warm Restarts (w/ cosine annealing)
    for optimizer_name in ['AdamW', 'NadamW', 'SGDW']:
        cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue')
        reset_seeds()

        num_batches, num_epochs = 25, 4
        batch_size, timesteps, num_channels = 16, 8, 4
        batch_shape = (batch_size, timesteps, num_channels)
        total_iterations = num_batches  # due to warm restarts

        model = _make_model(batch_shape, l1_reg=1e-4, l2_reg=1e-4)
        optimizer = _make_optimizer(optimizer_name, model, total_iterations)
        model.compile(optimizer, loss='binary_crossentropy')
        assert _valid_weight_decays(model)

        if hasattr(model, '_make_train_function'):  # graph-mode
            model._make_train_function()  # else K.eval before train may fail

        X, Y = _make_data(num_batches, *batch_shape)
        eta_history = []  # for stop-introspection
        t_cur_history = []  # for stop-introspection

        # // Explanation for "manual option" when autorestart=False
        # eta_t is first applied as-is, and only updated AFTER iteration;
        # setting t_cur does not immediately change eta_t.
        # Thus, t_cur must be reset 1 iteration BEFORE epoch ends
        # (t, e) = (t_cur_history[-1], eta_history[-1])
        # (t, e) = (24, 0)    -> RESET -> (-1, 0...)         [on     epoch end]
        # (t, e) = (23, 0...) -> RESET -> (-1, 0) -> (0, 1)  [before epoch end]
        for epoch in range(num_epochs):
            for batch_num in range(num_batches):
                t_cur_history += [K_eval(model.optimizer.t_cur, K)]
                eta_history += [K_eval(model.optimizer.eta_t, K)]
                model.train_on_batch(X[batch_num], Y[batch_num])
                # if batch_num == (num_batches - 2):  Manual Option
                #     K.set_value(model.optimizer.t_cur, -1)

        assert _valid_cosine_annealing(eta_history, total_iterations,
                                       num_epochs)
        assert model.optimizer.get_config(
        )  # ensure value evaluation won't error
        _test_save_load(model, X, optimizer_name, optimizer)

        # cleanup
        del model, optimizer
        reset_seeds(reset_graph_with_backend=K)

        cprint("\n<< {} MAIN TEST PASSED >>\n".format(optimizer_name), 'green')
    cprint("\n<< ALL MAIN TESTS PASSED >>\n", 'green')
Exemplo n.º 3
0
    def test_main(self):  # Save/Load, Warm Restarts (w/ cosine annealing)
        for optimizer_name in ['AdamW', 'NadamW', 'SGDW']:
            cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue')
            reset_seeds()

            num_batches, num_epochs = 25, 4
            batch_size, timesteps, num_channels = 16, 8, 4
            batch_shape = (batch_size, timesteps, num_channels)
            total_iterations = num_batches  # due to warm restarts

            self.model = self._make_model(batch_shape,
                                          total_iterations,
                                          l1_reg=1e-4,
                                          l2_reg=1e-4)
            optimizer = self._make_optimizer(optimizer_name, self.model,
                                             total_iterations)
            self.model.compile(optimizer, loss='binary_crossentropy')
            self.assertTrue(self._valid_weight_decays(self.model))
            self.model._make_train_function(
            )  # else K.eval before train may fail

            X, Y = self._make_data(num_batches, *batch_shape)
            self.eta_history = []  # for stop-introspection
            self.t_cur_history = []  # for stop-introspection

            for epoch in range(num_epochs):
                for batch_num in range(num_batches):
                    self.t_cur_history += [
                        K_eval(self.model.optimizer.t_cur, K)
                    ]
                    self.eta_history += [K_eval(self.model.optimizer.eta_t, K)]
                    self.model.train_on_batch(X[batch_num], Y[batch_num])
                    self.eta_history += [K_eval(self.model.optimizer.eta_t, K)]
                    self.eta_history.pop(-(1 + int(tf_eager)))
                K.set_value(self.model.optimizer.t_cur, 0)

            self.assertTrue(
                self._valid_cosine_annealing(self.eta_history,
                                             total_iterations, num_epochs))
            self._test_save_load(self.model, X, optimizer_name, optimizer)

            # cleanup
            del self.model, optimizer
            reset_seeds(reset_graph_with_backend=K)

            cprint("\n<< {} MAIN TEST PASSED >>\n".format(optimizer_name),
                   'green')
        cprint("\n<< ALL MAIN TESTS PASSED >>\n", 'green')
def test_misc():  # tests of non-main features to improve coverage
    for optimizer_name in ['AdamW', 'NadamW', 'SGDW']:
        cprint("<< TESTING {} OPTIMIZER >>".format(optimizer_name), 'blue')
        reset_seeds()

        optimizer_kw = {
            'total_iterations': 0,
            'decay': 1e-3,
            'amsgrad': optimizer_name == 'AdamW',
            'nesterov': optimizer_name == 'SGDW'
        }
        num_batches = 4
        batch_size, timesteps = 16, 8
        batch_shape = (batch_size, timesteps)
        embed_input_dim = 5

        # arbitrarily select SGDW for coverage testing
        l1_reg = 1e-4 if optimizer_name == 'SGDW' else 0
        l2_reg = 1e-4 if optimizer_name != 'SGDW' else 0
        if optimizer_name == 'SGDW':
            optimizer_kw.update(
                dict(zero_penalties=False,
                     weight_decays={},
                     total_iterations=2,
                     momentum=0))

        model = _make_model(batch_shape,
                            embed_input_dim=embed_input_dim,
                            dense_constraint=1,
                            l1_reg=l1_reg,
                            l2_reg=l2_reg,
                            bidirectional=False,
                            sparse=True)
        optimizer = _make_optimizer(optimizer_name, model, **optimizer_kw)
        model.compile(optimizer, loss='sparse_categorical_crossentropy')
        X, Y = _make_data(num_batches,
                          *batch_shape,
                          embed_input_dim=embed_input_dim,
                          sparse=True)

        for batch_num in range(num_batches):
            model.train_on_batch(X[batch_num], Y[batch_num])

        _test_save_load(model, X, optimizer_name, optimizer)

        # util test
        dc = {'lstm': 0, 'dense': 0}
        fill_dict_in_order(dc, [1e-4, 2e-4])
        AdamW(model=model, zero_penalties=False, total_iterations=2)
        AdamW(model=model, weight_decays={'a': 0})

        opt = AdamW(weight_decays={model.layers[1].weights[0].name: (0, 0)},
                    total_iterations=2)
        model.compile(opt, loss='sparse_categorical_crossentropy')
        model.train_on_batch(X[0], Y[0])

        # cleanup
        del model, optimizer
        reset_seeds(reset_graph_with_backend=K)
        try:
            K_eval('x', K)  # for coverage
        except:
            pass

        cprint("\n<< {} MISC TEST PASSED >>\n".format(optimizer_name), 'green')
    cprint("\n<< ALL MISC TESTS PASSED >>\n", 'green')
def _test_control(optimizer_name, amsgrad=False, nesterov=False, momentum=.9):
    optimizer_kw = dict(total_iterations=0,
                        decay=1e-3,
                        amsgrad=amsgrad,
                        nesterov=nesterov,
                        momentum=momentum,
                        control_mode=True)
    num_batches = 100
    batch_size, timesteps = 16, 32
    batch_shape = (batch_size, timesteps)
    embed_input_dim = 5

    model_kw = dict(batch_shape=batch_shape,
                    dense_constraint=1,
                    embed_input_dim=embed_input_dim,
                    l1_reg=0,
                    l2_reg=0,
                    bidirectional=False,
                    sparse=True)
    loss_name = 'sparse_categorical_crossentropy'
    reset_seeds(verbose=0)
    X, Y = _make_data(num_batches,
                      *batch_shape,
                      embed_input_dim=embed_input_dim,
                      sparse=True)

    reset_seeds(reset_graph_with_backend=K, verbose=0)
    model_custom = _make_model(**model_kw)
    optimizer_custom = _make_optimizer(optimizer_name, model_custom,
                                       **optimizer_kw)
    model_custom.compile(optimizer_custom, loss=loss_name)
    loss_custom = []  # for introspection
    t0 = time()
    for batch_num in range(num_batches):
        loss_custom += [
            model_custom.train_on_batch(X[batch_num], Y[batch_num])
        ]
    print("\nmodel_custom -- %s batches -- time: %.2f sec" %
          (num_batches, time() - t0))

    reset_seeds(reset_graph_with_backend=K, verbose=0)
    model_control = _make_model(**model_kw)
    optimizer_control = _make_optimizer(optimizer_name[:-1], model_control,
                                        **optimizer_kw)
    model_control.compile(optimizer_control, loss=loss_name)
    loss_control = []  # for introspection
    t0 = time()
    for batch_num in range(num_batches):
        loss_control += [
            model_control.train_on_batch(X[batch_num], Y[batch_num])
        ]
    print("model_control -- %s batches -- time: %.2f sec" %
          (num_batches, time() - t0))

    loss_diff = np.abs(np.array(loss_custom) - np.array(loss_control))
    print("%s max loss diff: %e" % (optimizer_name, np.max(loss_diff)))

    assert np.allclose(loss_custom, loss_control, rtol=0, atol=1e-3)
    # cleanup
    del model_custom, model_control
    del optimizer_custom, optimizer_control
    reset_seeds(reset_graph_with_backend=K, verbose=0)