Пример #1
0
def extend_with_piecewise_linear_lr(base_optimizer, name=None):
    """返回新的优化器类,加入分段线性学习率
    """
    class new_optimizer(base_optimizer):
        """带有分段线性学习率的优化器
        其中schedule是形如{1000: 1, 2000: 0.1}的字典,
        表示0~1000步内学习率线性地从零增加到100%,然后
        1000~2000步内线性地降到10%,2000步以后保持10%
        """
        def __init__(self, lr_schedule, *args, **kwargs):
            super(new_optimizer, self).__init__(*args, **kwargs)
            self.lr_schedule = {int(i): j for i, j in lr_schedule.items()}

        def _decayed_lr(self, var_dtype):
            lr_multiplier = piecewise_linear(self.iterations, self.lr_schedule)
            lr_t = super(new_optimizer, self)._decayed_lr(var_dtype)
            return lr_t * K.cast(lr_multiplier, var_dtype)

        def get_config(self):
            config = {'lr_schedule': self.lr_schedule}
            base_config = super(new_optimizer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    if is_string(name):
        new_optimizer.__name__ = name
        keras.utils.get_custom_objects()[name] = new_optimizer

    return new_optimizer
Пример #2
0
    def encode(self,
               first_text,
               second_text=None,
               max_length=None,
               first_length=None,
               second_length=None):
        """输出文本对应token id和segment id
        如果传入first_length,则强行padding第一个句子到指定长度;
        同理,如果传入second_length,则强行padding第二个句子到指定长度。
        """
        if is_string(first_text):
            first_tokens = self.tokenize(first_text)
        else:
            first_tokens = first_text

        if second_text is None:
            second_tokens = None
        elif is_string(second_text):
            second_tokens = self.tokenize(second_text, add_cls=False)
        else:
            second_tokens = second_text

        if max_length is not None:
            self.truncate_sequence(max_length, first_tokens, second_tokens, -2)

        first_token_ids = self.tokens_to_ids(first_tokens)
        if first_length is not None:
            first_token_ids = first_token_ids[:first_length]
            first_token_ids.extend([self._token_pad_id] *
                                   (first_length - len(first_token_ids)))
        first_segment_ids = [0] * len(first_token_ids)

        if second_text is not None:
            second_token_ids = self.tokens_to_ids(second_tokens)
            if second_length is not None:
                second_token_ids = second_token_ids[:second_length]
                second_token_ids.extend(
                    [self._token_pad_id] *
                    (second_length - len(second_token_ids)))
            second_segment_ids = [1] * len(second_token_ids)

            first_token_ids.extend(second_token_ids)
            first_segment_ids.extend(second_segment_ids)

        return first_token_ids, first_segment_ids
Пример #3
0
def extend_with_layer_adaptation(base_optimizer, name=None):
    """返回新的优化器类,加入层自适应学习率
    """
    class new_optimizer(base_optimizer):
        """带有层自适应学习率的优化器
        用每一层参数的模长来校正当前参数的学习率
        https://arxiv.org/abs/1904.00962
        """
        def __init__(self,
                     exclude_from_layer_adaptation=None,
                     *args,
                     **kwargs):
            super(new_optimizer, self).__init__(*args, **kwargs)
            self.exclude_from_layer_adaptation = exclude_from_layer_adaptation or []

        def _resource_apply_op(self, grad, var, indices=None):
            old_update = K.update

            def new_update(x, new_x):
                if x is var and self._do_layer_adaptation(x):
                    dx = new_x - x
                    lr_t = self._decayed_lr(x.dtype.base_dtype)
                    lr_t = K.clip(lr_t, K.epsilon(), 1e10)
                    x_norm = tf.norm(x)
                    g_norm = tf.norm(dx / lr_t)
                    ratio = K.switch(
                        x_norm > 0.,
                        K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.),
                        1.)
                    new_x = x + dx * ratio
                return old_update(x, new_x)

            K.update = new_update
            op = super(new_optimizer,
                       self)._resource_apply_op(grad, var, indices)
            K.update = old_update

            return op

        def _do_layer_adaptation(self, w):
            return (not string_matching(w.name,
                                        self.exclude_from_layer_adaptation))

        def get_config(self):
            config = {
                'exclude_from_layer_adaptation':
                self.exclude_from_layer_adaptation
            }
            base_config = super(new_optimizer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    if is_string(name):
        new_optimizer.__name__ = name
        keras.utils.get_custom_objects()[name] = new_optimizer

    return new_optimizer
Пример #4
0
def extend_with_lookahead(base_optimizer, name=None):
    """返回新的优化器类,加入look ahead
    """
    class new_optimizer(base_optimizer):
        """带有look ahead的优化器
        https://arxiv.org/abs/1907.08610
        steps_per_slow_update: 即论文中的k;
        slow_step_size: 即论文中的alpha。
        """
        def __init__(self,
                     steps_per_slow_update=5,
                     slow_step_size=0.5,
                     *args,
                     **kwargs):
            super(new_optimizer, self).__init__(*args, **kwargs)
            self.steps_per_slow_update = steps_per_slow_update
            self.slow_step_size = slow_step_size

        def _create_slots(self, var_list):
            super(new_optimizer, self)._create_slots(var_list)
            for var in var_list:
                self.add_slot(var, 'slow_var')

        def _resource_apply_op(self, grad, var, indices=None):
            op = super(new_optimizer,
                       self)._resource_apply_op(grad, var, indices)

            k, alpha = self.steps_per_slow_update, self.slow_step_size
            cond = K.equal(self.iterations % k, 0)
            slow_var = self.get_slot(var, 'slow_var')
            slow_var_t = slow_var + alpha * (var - slow_var)

            with tf.control_dependencies([op]):
                slow_update = K.update(slow_var,
                                       K.switch(cond, slow_var_t, slow_var))
                with tf.control_dependencies([slow_update]):
                    copy_update = K.update(var, K.switch(cond, slow_var, var))

            return copy_update

        def get_config(self):
            config = {
                'steps_per_slow_update': self.steps_per_slow_update,
                'slow_step_size': self.slow_step_size
            }
            base_config = super(new_optimizer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    if is_string(name):
        new_optimizer.__name__ = name
        keras.utils.get_custom_objects()[name] = new_optimizer

    return new_optimizer
Пример #5
0
def extend_with_gradient_accumulation(base_optimizer, name=None):
    """返回新的优化器类,加入梯度累积
    """
    class new_optimizer(base_optimizer):
        """带有梯度累积的优化器
        """
        def __init__(self, grad_accum_steps, *args, **kwargs):
            super(new_optimizer, self).__init__(*args, **kwargs)
            self.grad_accum_steps = grad_accum_steps

        def _create_slots(self, var_list):
            super(new_optimizer, self)._create_slots(var_list)
            for var in var_list:
                self.add_slot(var, 'ag')

        def _resource_apply_op(self, grad, var, indices=None):
            # 更新判据
            cond = K.equal(self.iterations % self.grad_accum_steps, 0)
            # 获取梯度
            ag = self.get_slot(var, 'ag')

            old_update = K.update

            def new_update(x, new_x):
                new_x = K.switch(cond, new_x, x)
                return old_update(x, new_x)

            K.update = new_update
            ag_t = ag / self.grad_accum_steps
            op = super(new_optimizer, self)._resource_apply_op(ag_t, var)
            K.update = old_update

            # 累积梯度
            with tf.control_dependencies([op]):
                ag_t = K.switch(cond, K.zeros_like(ag), ag)
                with tf.control_dependencies([K.update(ag, ag_t)]):
                    if indices is None:
                        ag_t = K.update(ag, ag + grad)
                    else:
                        ag_t = self._resource_scatter_add(ag, indices, grad)

            return ag_t

        def get_config(self):
            config = {'grad_accum_steps': self.grad_accum_steps}
            base_config = super(new_optimizer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    if is_string(name):
        new_optimizer.__name__ = name
        keras.utils.get_custom_objects()[name] = new_optimizer

    return new_optimizer
Пример #6
0
def extend_with_lazy_optimization(base_optimizer, name=None):
    """返回新的优化器类,加入懒惰更新
    """
    class new_optimizer(base_optimizer):
        """带有懒惰更新的优化器
        使得部分权重(尤其是embedding)只有在梯度不等于0时
        才发生更新。
        """
        def __init__(self, include_in_lazy_optimization=None, *args, **kwargs):
            super(new_optimizer, self).__init__(*args, **kwargs)
            self.include_in_lazy_optimization = include_in_lazy_optimization or []

        def _resource_apply_op(self, grad, var, indices=None):
            old_update = K.update

            def new_update(x, new_x):
                if x is var and self._do_lazy_optimization(x):
                    if indices is None:
                        r = K.any(K.not_equal(grad, 0.),
                                  axis=-1,
                                  keepdims=True)
                        new_x = x + (new_x - x) * K.cast(r, K.floatx())
                        return old_update(x, new_x)
                    else:
                        return self._resource_scatter_add(
                            x, indices, K.gather(new_x - x, indices))
                return old_update(x, new_x)

            K.update = new_update
            op = super(new_optimizer,
                       self)._resource_apply_op(grad, var, indices)
            K.update = old_update

            return op

        def _do_lazy_optimization(self, w):
            return string_matching(w.name, self.include_in_lazy_optimization)

        def get_config(self):
            config = {
                'include_in_lazy_optimization':
                self.include_in_lazy_optimization
            }
            base_config = super(new_optimizer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    if is_string(name):
        new_optimizer.__name__ = name
        keras.utils.get_custom_objects()[name] = new_optimizer

    return new_optimizer
Пример #7
0
def extend_with_weight_decay(base_optimizer, name=None):
    """返回新的优化器类,加入权重衰减
    """
    class new_optimizer(base_optimizer):
        """带有权重衰减的优化器
        """
        def __init__(self,
                     weight_decay_rate,
                     exclude_from_weight_decay=None,
                     *args,
                     **kwargs):
            super(new_optimizer, self).__init__(*args, **kwargs)
            self.weight_decay_rate = weight_decay_rate
            self.exclude_from_weight_decay = exclude_from_weight_decay or []

        def _resource_apply_op(self, grad, var, indices=None):
            old_update = K.update

            def new_update(x, new_x):
                if x is var and self._do_weight_decay(x):
                    lr_t = self._decayed_lr(x.dtype.base_dtype)
                    new_x = new_x - lr_t * self.weight_decay_rate * x
                return old_update(x, new_x)

            K.update = new_update
            op = super(new_optimizer,
                       self)._resource_apply_op(grad, var, indices)
            K.update = old_update

            return op

        def _do_weight_decay(self, w):
            return (not string_matching(w.name,
                                        self.exclude_from_weight_decay))

        def get_config(self):
            config = {
                'weight_decay_rate': self.weight_decay_rate,
                'exclude_from_weight_decay': self.exclude_from_weight_decay
            }
            base_config = super(new_optimizer, self).get_config()
            return dict(list(base_config.items()) + list(config.items()))

    if is_string(name):
        new_optimizer.__name__ = name
        keras.utils.get_custom_objects()[name] = new_optimizer

    return new_optimizer
Пример #8
0
    def __init__(self, token_dict, do_lower_case=False):
        """初始化
        """
        super(Tokenizer, self).__init__(do_lower_case)
        if is_string(token_dict):
            token_dict = load_vocab(token_dict)

        self._token_dict = token_dict
        self._token_dict_inv = {v: k for k, v in token_dict.items()}
        for token in ['pad', 'cls', 'sep', 'unk', 'mask']:
            try:
                _token_id = token_dict[getattr(self, '_token_%s' % token)]
                setattr(self, '_token_%s_id' % token, _token_id)
            except:
                pass
        self._vocab_size = len(token_dict)