示例#1
0
    def test_lambda_lr(test_case):
        optimizer = flow.optim.SGD(
            [
                {
                    "params": [Parameter(flow.Tensor([1.0]))]
                },
                {
                    "params": [Parameter(flow.Tensor([1.0]))]
                },
            ],
            lr=TestLrScheduler.base_lr,
        )
        lambdas = [lambda step: step // 30, lambda step: 0.95 * step]

        def lambda_lr_step(base_lrs, current_step):
            return [
                base_lr * lmbda(current_step)
                for (base_lr, lmbda) in zip(base_lrs, lambdas)
            ]

        lambda_lr = flow.optim.lr_scheduler.LambdaLR(optimizer,
                                                     lr_lambda=lambdas)
        for i in range(1, 21):
            lambda_lr.step()
            new_lrs = lambda_lr_step(lambda_lr.base_lrs, i)
            for (lr1, lr2) in zip(lambda_lr.get_last_lr(), new_lrs):
                test_case.assertAlmostEqual(lr1, lr2, places=5)
示例#2
0
def rebuild_tensor(cls, tensor_data, requires_grad):
    t = flow.tensor(tensor_data)
    if cls == Parameter:
        # we have to pass requires_grad into constructor, rather than set it as an
        # attribute later, because it's an important check for Integer Tensors to
        # have requires_grad=False (or else they raise an error)
        t = Parameter(t, requires_grad=requires_grad)
    else:
        t.requires_grad = requires_grad
    return t
示例#3
0
    def test_cosine_annealing_lr(test_case):
        optimizer = flow.optim.SGD([{
            "params": [Parameter(flow.Tensor([1.0]))]
        }],
                                   lr=TestLrScheduler.base_lr)

        def cosine_annealing_lr_step(base_lr, current_step, last_lr, T_max,
                                     eta_min):
            if (current_step - 1 - T_max) % (2 * T_max) == 0:
                return (last_lr + (TestLrScheduler.base_lr - eta_min) *
                        (1 - math.cos(math.pi / T_max)) / 2)
            else:
                return (1 + math.cos(math.pi * current_step / T_max)) / (
                    1 + math.cos(math.pi * (current_step - 1) / T_max)) * (
                        last_lr - eta_min) + eta_min

        T_max = 20
        eta_min = 0.5
        cosine_annealing_lr = flow.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=T_max, eta_min=eta_min)
        numpy_last_lr = TestLrScheduler.base_lr
        for i in range(1, 101):
            cosine_annealing_lr.step()
            numpy_last_lr = cosine_annealing_lr_step(TestLrScheduler.base_lr,
                                                     i, numpy_last_lr, T_max,
                                                     eta_min)
            test_case.assertAlmostEqual(cosine_annealing_lr.get_last_lr()[0],
                                        numpy_last_lr,
                                        places=4)
示例#4
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        sgd = flow.optim.SGD([{
            "params": [x],
            "lr": learning_rate,
            "momentum": momentum,
            "weight_decay": weight_decay,
        }])

        def train_one_iter(grad):
            grad_tensor = flow.tensor(
                grad,
                dtype=flow.float32,
                requires_grad=False,
                device=flow.device(device),
            )
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            sgd.step()
            sgd.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
            # test state_dict/load_state_dict
            if i == reload_state_step:
                state_dict = sgd.state_dict()
                sgd = flow.optim.SGD([x])
                if save_load_by_pickle:
                    with tempfile.TemporaryDirectory() as save_dir:
                        flow.save(state_dict, save_dir)
                        state_dict = flow.load(save_dir)
                sgd.load_state_dict(state_dict)
        return x
示例#5
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        adagrad = flow.optim.Adagrad(
            [
                {
                    "params": [x],
                    "lr": learning_rate,
                    "eps": eps,
                    "weight_decay": weight_decay,
                }
            ],
            lr_decay=lr_decay,
            initial_accumulator_value=initial_accumulator_value,
        )

        def train_one_iter(grad):
            grad_tensor = flow.tensor(
                grad, requires_grad=False, device=flow.device(device)
            )
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            adagrad.step()
            adagrad.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
            if i == reload_state_step:
                state_dict = adagrad.state_dict()
                adagrad = flow.optim.Adagrad([x])
                if save_load_by_pickle:
                    with tempfile.TemporaryDirectory() as save_dir:
                        flow.save(state_dict, save_dir)
                        state_dict = flow.load(save_dir)
                adagrad.load_state_dict(state_dict)
        return x
示例#6
0
    def test_cosine_decay_lr(test_case):
        optimizer = flow.optim.SGD([{
            "params": [Parameter(flow.Tensor([1.0]))]
        }],
                                   lr=TestLrScheduler.base_lr)

        def cosine_decay_lr_step(base_lr, current_step, decay_steps, alpha):
            if current_step < decay_steps:
                cos_decay = 0.5 * (
                    1 + math.cos(math.pi * current_step / decay_steps))
                decay_factor = (1 - alpha) * cos_decay + alpha
                return base_lr * decay_factor
            else:
                return base_lr * alpha

        alpha = 0.5
        decay_steps = 10
        cosine_decay_lr = flow.optim.lr_scheduler.CosineDecayLR(
            optimizer, decay_steps=decay_steps, alpha=alpha)
        for i in range(1, 21):
            cosine_decay_lr.step()
            new_lr = cosine_decay_lr_step(TestLrScheduler.base_lr, i,
                                          decay_steps, alpha)
            test_case.assertAlmostEqual(cosine_decay_lr.get_last_lr()[0],
                                        new_lr,
                                        places=4)
示例#7
0
    def _apply(self, fn, applied_dict=None):
        # A dict to store tensors that has already been applied.
        # There is no need to apply multiple times on a same tensor.
        if applied_dict is None:
            applied_dict = dict()

        for module in self.children():
            module._apply(fn, applied_dict)

        def can_use_assign_copy(tensor, tensor_applied):
            return tensor.is_local == tensor_applied.is_local

        for (key, param) in self._parameters.items():
            if param is None:
                continue

            need_apply = False
            if param not in applied_dict:
                need_apply = True
                assert isinstance(param, Parameter)
                assert param.is_leaf
                with flow.no_grad():
                    param_applied = fn(param)
                param_applied.requires_grad = param.requires_grad

                if param.grad is not None:
                    assert param.grad.is_leaf
                    with flow.no_grad():
                        grad_applied = fn(param.grad)
                    grad_applied.requires_grad = param.grad.requires_grad
                    param_applied.grad = grad_applied
            else:
                param_applied = applied_dict[param]

            if can_use_assign_copy(param_applied, param):
                if need_apply:
                    self._parameters[key].data = param_applied
                    applied_dict[param] = param_applied
                else:
                    # The parameter's data has already been set when it can use assign copy.
                    pass
            else:
                if need_apply:
                    new_param = Parameter(param_applied, param.requires_grad)
                    self._parameters[key] = new_param
                    applied_dict[param] = new_param
                else:
                    self._parameters[key] = applied_dict[param]

        for (key, buf) in self._buffers.items():
            if buf is not None:
                if buf not in applied_dict:
                    buf_applied = fn(buf)
                    self._buffers[key] = buf_applied
                    applied_dict[buf] = buf_applied
                else:
                    self._buffers[key] = applied_dict[buf]
        return self
示例#8
0
def rebuild_shm_parameter(shm, shape, dtype, requires_grad):
    def delete_shm():
        shm.close()
        shm.unlink()

    arr = np.ndarray(shape, dtype=dtype, buffer=shm.buf)
    t = flow.from_numpy(arr)
    t._register_storage_delete_hook(delete_shm)
    return Parameter(t, requires_grad=requires_grad)
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        param_list = list()
        param_list.append(x)
        rmsprop = flow.optim.RMSprop([{
            "params":
            param_list,
            "lr":
            learning_rate,
            "alpha":
            alpha,
            "eps":
            eps,
            "weight_decay":
            weight_decay,
            "momentum":
            momentum,
            "centered":
            centered,
            "clip_grad_max_norm":
            clip_grad_max_norm,
            "clip_grad_norm_type":
            clip_grad_norm_type,
        }])

        def train_one_iter(grad):
            grad_tensor = flow.tensor(
                grad,
                dtype=flow.float32,
                requires_grad=False,
                device=flow.device(device),
            )
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            rmsprop.clip_grad()
            rmsprop.step()
            rmsprop.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
            if i == reload_state_step:
                state_dict = rmsprop.state_dict()
                rmsprop = flow.optim.RMSprop([x])
                if save_load_by_pickle:
                    with tempfile.TemporaryDirectory() as save_dir:
                        flow.save(state_dict, save_dir)
                        state_dict = flow.load(save_dir)
                rmsprop.load_state_dict(state_dict)
        return x
示例#10
0
    def test_polynomial_lr(test_case):
        optimizer = flow.optim.SGD([{
            "params": [Parameter(flow.Tensor([1.0]))]
        }],
                                   lr=TestLrScheduler.base_lr)

        def polynomial_lr_step(base_lr, end_lr, step, decay_steps, power,
                               cycle):
            if cycle:
                if step == 0:
                    step = 1
                decay_steps = decay_steps * math.ceil(step / decay_steps)
            step = min(step, decay_steps)
            return (base_lr - end_lr) * (1 -
                                         step / decay_steps)**power + end_lr

        decay_steps = 100
        end_learning_rate = 1e-5
        power = 2
        cycle = True
        poly_decay_lr = flow.optim.lr_scheduler.PolynomialLR(
            optimizer, decay_steps, end_learning_rate, power, cycle)
        # step(0) will be invoked in LrScheduler.__init__
        new_lr = polynomial_lr_step(TestLrScheduler.base_lr, end_learning_rate,
                                    0, decay_steps, power, cycle)
        test_case.assertAlmostEqual(poly_decay_lr.get_last_lr()[0],
                                    new_lr,
                                    places=4)
        for i in range(1, 21):
            poly_decay_lr.step()
            new_lr = polynomial_lr_step(TestLrScheduler.base_lr,
                                        end_learning_rate, i, decay_steps,
                                        power, cycle)
            test_case.assertAlmostEqual(poly_decay_lr.get_last_lr()[0],
                                        new_lr,
                                        places=4)

        cycle = True
        poly_decay_lr = flow.optim.lr_scheduler.PolynomialLR(
            optimizer, decay_steps, end_learning_rate, power, cycle)
        for i in range(1, 21):
            poly_decay_lr.step()
            new_lr = polynomial_lr_step(TestLrScheduler.base_lr,
                                        end_learning_rate, i, decay_steps,
                                        power, cycle)
            test_case.assertAlmostEqual(poly_decay_lr.get_last_lr()[0],
                                        new_lr,
                                        places=4)
示例#11
0
    def test_exponential_lr(test_case):
        optimizer = flow.optim.SGD([{
            "params": [Parameter(flow.Tensor([1.0]))]
        }],
                                   lr=TestLrScheduler.base_lr)

        def exponential_lr_step(base_lr, current_step, gamma):
            return base_lr * gamma**current_step

        gamma = 0.1
        exponential_lr = flow.optim.lr_scheduler.ExponentialLR(optimizer,
                                                               gamma=gamma)
        for i in range(1, 21):
            exponential_lr.step()
            new_lr = exponential_lr_step(TestLrScheduler.base_lr, i, gamma)
            test_case.assertAlmostEqual(exponential_lr.get_last_lr()[0],
                                        new_lr,
                                        places=5)
示例#12
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        adam = flow.optim.Adam(
            [
                {
                    "params": [x],
                    "lr": learning_rate,
                    "betas": betas,
                    "eps": eps,
                    "weight_decay": weight_decay,
                    "clip_grad_max_norm": clip_grad_max_norm,
                    "clip_grad_norm_type": clip_grad_norm_type,
                }
            ],
            do_bias_correction=do_bias_correction,
            amsgrad=amsgrad,
        )

        def train_one_iter(grad):
            grad_tensor = flow.tensor(
                grad,
                dtype=flow.float32,
                requires_grad=False,
                device=flow.device(device),
            )
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            adam.clip_grad()
            adam.step()
            adam.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
            if i == reload_state_step:
                state_dict = adam.state_dict()
                adam = flow.optim.Adam(
                    [{"params": [x],}], do_bias_correction=do_bias_correction,
                )
                if save_load_by_pickle:
                    with tempfile.TemporaryDirectory() as save_dir:
                        flow.save(state_dict, save_dir)
                        state_dict = flow.load(save_dir)
                adam.load_state_dict(state_dict)
        return x
示例#13
0
    def test_step_lr(test_case):
        optimizer = flow.optim.SGD([{
            "params": [Parameter(flow.Tensor([1.0]))]
        }],
                                   lr=TestLrScheduler.base_lr)

        def step_lr_step(base_lr, current_step, step_size, gamma):
            return base_lr * gamma**(current_step // step_size)

        gamma = 0.1
        step_size = 5
        step_lr = flow.optim.lr_scheduler.StepLR(optimizer,
                                                 step_size=step_size,
                                                 gamma=gamma)
        for i in range(1, 21):
            step_lr.step()
            new_lr = step_lr_step(TestLrScheduler.base_lr, i, step_size, gamma)
            test_case.assertAlmostEqual(step_lr.get_last_lr()[0],
                                        new_lr,
                                        places=5)
示例#14
0
    def train_by_oneflow():
        x = Parameter(flow.Tensor(init_value, device=flow.device(device)))
        ftrl = Ftrl([{
            "params": [x],
            "lr": learning_rate,
            "weight_decay": weight_decay,
            "lr_power": lr_power,
            "initial_accumulator_value": initial_accumulator_value,
            "lambda1": lambda1,
            "lambda2": lambda2,
            "beta": beta,
            "clip_grad_max_norm": clip_grad_max_norm,
            "clip_grad_norm_type": clip_grad_norm_type,
        }])

        def train_one_iter(grad):
            grad_tensor = flow.tensor(
                grad,
                dtype=flow.float32,
                requires_grad=False,
                device=flow.device(device),
            )
            loss = flow.sum(x * grad_tensor)
            loss.backward()
            ftrl.clip_grad()
            ftrl.step()
            ftrl.zero_grad()

        for i in range(train_iters):
            train_one_iter(random_grad_seq[i])
            if i == reload_state_step:
                state_dict = ftrl.state_dict()
                ftrl = Ftrl([{
                    "params": [x],
                }])
                if save_load_by_pickle:
                    with tempfile.TemporaryDirectory() as save_dir:
                        flow.save(state_dict, save_dir)
                        state_dict = flow.load(save_dir)
                ftrl.load_state_dict(state_dict)
        return x
示例#15
0
    def test_multistep_lr(test_case):
        optimizer = flow.optim.SGD([{
            "params": [Parameter(flow.Tensor([1.0]))]
        }],
                                   lr=TestLrScheduler.base_lr)

        def multistep_lr_step(base_lr, current_step, milestones, gamma):
            count = 0
            for step in milestones:
                if current_step >= step:
                    count += 1
            return base_lr * gamma**count

        gamma = 0.1
        milestones = [5, 11, 15]
        multistep_lr = flow.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=milestones, gamma=gamma)
        for i in range(1, 18):
            multistep_lr.step()
            new_lr = multistep_lr_step(TestLrScheduler.base_lr, i, milestones,
                                       gamma)
            test_case.assertAlmostEqual(multistep_lr.get_last_lr()[0],
                                        new_lr,
                                        places=5)
示例#16
0
def compare_with_torch_reduce_lr(
    test_case,
    mode,
    factor,
    patience,
    threshold,
    threshold_mode,
    cooldown,
    min_lr,
    eps,
):
    optimizer_flow = flow.optim.SGD(
        [
            {
                "params": [Parameter(flow.Tensor([1.0]))]
            },
        ],
        lr=TestLrScheduler.base_lr,
        momentum=0.9,
    )

    optimizer_torch = torch.optim.SGD(
        [
            {
                "params": [torch.nn.Parameter(torch.Tensor([1.0]))]
            },
        ],
        lr=TestLrScheduler.base_lr,
        momentum=0.9,
    )

    scheduler_flow = flow.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_flow,
        mode,
        factor,
        patience,
        threshold,
        threshold_mode,
        cooldown,
        min_lr,
        eps,
    )
    scheduler_troch = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer_torch,
        mode,
        factor,
        patience,
        threshold,
        threshold_mode,
        cooldown,
        min_lr,
        eps,
    )
    val_loss = 0.1
    for epoch in range(15):
        val_loss += (random.random() - 0.5) / 10
        scheduler_flow.step(val_loss)
        scheduler_troch.step(val_loss)
        for (lr1, lr2) in zip(scheduler_flow._last_lr,
                              scheduler_troch._last_lr):
            test_case.assertAlmostEqual(lr1, lr2, places=5)
示例#17
0
def rebuild_empty_parameter(shape, dtype, requires_grad):
    t = flow.tensor([], dtype=dtype)
    t = t.reshape(*shape)
    return Parameter(t, requires_grad=requires_grad)