def exponential_decay_with_warmup(learning_rate, step_each_epoch, decay_epochs, decay_rate=0.97, warm_up_epoch=5.0): """Applies exponential decay to the learning rate. """ global_step = _decay_step_counter() lr = fluid.layers.tensor.create_global_var(shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") warmup_epoch = fluid.layers.fill_constant(shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True) with init_on_cpu(): epoch = ops.floor(global_step / step_each_epoch) with fluid.layers.control_flow.Switch() as switch: with switch.case(epoch < warmup_epoch): decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) fluid.layers.assign(input=decayed_lr, output=lr) with switch.default(): div_res = (global_step - warmup_epoch * step_each_epoch) / decay_epochs div_res = ops.floor(div_res) decayed_lr = learning_rate * (decay_rate**div_res) fluid.layers.assign(input=decayed_lr, output=lr) return lr
def __call__(self): global_step = _decay_step_counter() learning_rate = fluid.layers.tensor.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") epoch = ops.floor(global_step / self.step_each_epoch) with fluid.layers.control_flow.Switch() as switch: with switch.case(epoch < self.warmup_epoch): decayed_lr = self.lr * \ (global_step / (self.step_each_epoch * self.warmup_epoch)) fluid.layers.tensor.assign(input=decayed_lr, output=learning_rate) with switch.default(): rest_step = global_step - self.warmup_epoch * self.step_each_epoch div_res = ops.floor(rest_step / self.decay_epochs) decayed_lr = self.lr * (self.decay_rate**div_res) fluid.layers.tensor.assign(input=decayed_lr, output=learning_rate) return learning_rate
def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) decrease lr for every mini-batch and start with warmup. """ global_step = _decay_step_counter() lr = fluid.layers.tensor.create_global_var(shape=[1], value=0.0, dtype='float32', persistable=True, name="learning_rate") warmup_epoch = fluid.layers.fill_constant(shape=[1], dtype='float32', value=float(5), force_cpu=True) with init_on_cpu(): epoch = ops.floor(global_step / step_each_epoch) with fluid.layers.control_flow.Switch() as switch: with switch.case(epoch < warmup_epoch): decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) fluid.layers.tensor.assign(input=decayed_lr, output=lr) with switch.default(): decayed_lr = learning_rate * \ (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 fluid.layers.tensor.assign(input=decayed_lr, output=lr) return lr
def cosine_decay(learning_rate, step_each_epoch, epochs=120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) """ global_step = _decay_step_counter() epoch = ops.floor(global_step / step_each_epoch) decayed_lr = learning_rate * \ (ops.cos(epoch * (math.pi / epochs)) + 1)/2 return decayed_lr
def cosine_decay(learning_rate, step_each_epoch, epochs = 120): """Applies cosine decay to the learning rate. lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) """ global_step = _decay_step_counter() with init_on_cpu(): # update epoch = ops.floor(global_step / step_each_epoch) decayed_lr = learning_rate * \ (ops.cos(epoch * (math.pi / epochs)) + 1)/2 #if global_step % step_each_epoch == 0: # print("epoch={0}, global_step={1},decayed_lr={2} \ # (step_each_epoch={3})".format( \ # epoch,global_step,decayed_lr,step_each_epoch)) return decayed_lr