示例#1
0
def linear_warmup_decay(init_lr, num_train_steps, num_warmup_steps,
                        main_program):
    with main_program._lr_schedule_guard():
        global_step = lr_scheduler._decay_step_counter()

        lr = fluid.layers.create_global_var(shape=[1],
                                            value=init_lr,
                                            dtype='float32',
                                            persistable=True,
                                            name="learning_rate")

        with control_flow.Switch() as switch:
            with switch.case(global_step < num_warmup_steps):
                decayed_lr = init_lr * global_step * 1.0 / num_warmup_steps
                fluid.layers.assign(decayed_lr, lr)
            with switch.default():
                decayed_lr = lr_scheduler.polynomial_decay(
                    learning_rate=init_lr,
                    decay_steps=num_train_steps,
                    end_learning_rate=0.0,
                    power=1.0,
                    cycle=False)
                fluid.layers.assign(decayed_lr, lr)

        return lr
示例#2
0
    def scheduler_handler(self, max_train_steps):
        scheduled_lr = fluid.layers.create_global_var(shape=[1],
                                                      value=self.learning_rate,
                                                      dtype='float32',
                                                      persistable=True,
                                                      name="learning_rate")

        if not self.scheduler["slanted_triangle"]["cut_fraction"]:
            warmup_steps = int(max_train_steps * self.scheduler["warmup"])
            linear_decay_start = int(
                max_train_steps *
                self.scheduler["linear_decay"]["start_point"])
            if linear_decay_start < warmup_steps:
                logger.warning(
                    "linear decay can not start during warmup process,"
                    "it will start after warmup ends!")
                linear_decay_start = warmup_steps
            if self.scheduler["noam_decay"]:
                if warmup_steps > 0:
                    scheduled_lr = fluid.layers.learning_rate_scheduler \
                        .noam_decay(1 / (warmup_steps * (self.learning_rate ** 2)),
                                    warmup_steps)
                else:
                    logger.warning(
                        "Noam decay learning rate scheduler should have positive \
                        warmup steps, using constant learning rate instead!")

            if not self.scheduler["noam_decay"] and \
                    (warmup_steps > 0 or self.scheduler["linear_decay"]["start_point"]<1):
                with self.main_program._lr_schedule_guard():
                    global_step = lr_scheduler._decay_step_counter()
                    with control_flow.Switch() as switch:
                        if warmup_steps > 0:
                            with switch.case(global_step < warmup_steps):
                                decayed_lr = self.learning_rate * global_step * 1.0 / warmup_steps
                                fluid.layers.assign(decayed_lr, scheduled_lr)
                        if self.scheduler["linear_decay"]["start_point"] < 1:
                            with switch.case(
                                    global_step >= linear_decay_start):
                                decayed_lr = lr_scheduler.polynomial_decay(
                                    learning_rate=self.learning_rate,
                                    decay_steps=max_train_steps,
                                    end_learning_rate=self.scheduler[
                                        "linear_decay"]["end_learning_rate"],
                                    power=1.0,
                                    cycle=False)
                                fluid.layers.assign(decayed_lr, scheduled_lr)
        else:
            if self.scheduler["warmup"] or self.scheduler[
                    "noam_decay"] or self.scheduler["linear_decay"][
                        "start_point"] < 1:
                logger.warning(
                    "You are using slanted_triangle learning rate "
                    "which will make warmup, noam_decay and linear_decay unable"
                )
            cut_step = int(max_train_steps *
                           self.scheduler["slanted_triangle"]["cut_fraction"])
            ratio = self.scheduler["slanted_triangle"]["ratio"]
            global_step = lr_scheduler._decay_step_counter()
            with control_flow.Switch() as switch:
                with switch.case(global_step <= cut_step):
                    pct = global_step / cut_step
                    decayed_lr = self.learning_rate * (1 + pct *
                                                       (ratio - 1)) / ratio
                    fluid.layers.assign(decayed_lr, scheduled_lr)
                with switch.default():
                    pct = 1 - (global_step - cut_step) / (max_train_steps -
                                                          cut_step)
                    decayed_lr = self.learning_rate * (1 + pct *
                                                       (ratio - 1)) / ratio
                    fluid.layers.assign(decayed_lr, scheduled_lr)

        super(CombinedStrategy,
              self).__init__(optimizer_name=self._optimizer_name,
                             learning_rate=scheduled_lr)

        if self.scheduler["discriminative"]["blocks"]:
            _block_layers = math.ceil(
                len(self.sorted_depth) /
                self.scheduler["discriminative"]["blocks"])
            power = 0
            for cnt, depth in enumerate(self.sorted_depth):
                for index, param in enumerate(self.depth_params_dict[depth]):
                    param.optimize_attr["learning_rate"] *= \
                        pow(1.0 / self.scheduler["discriminative"]["factor"], power)
                if cnt and cnt % _block_layers == 0:
                    power += 1
        return scheduled_lr