Exemplo n.º 1
0
    def __init__(self,
                 name: str,
                 n_heads: int,
                 keys_encoder: Attendable,
                 values_encoder: Attendable = None,
                 dropout_keep_prob: float = 1.0,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, save_checkpoint, load_checkpoint,
                               initializers)

        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob

        if self.n_heads <= 0:
            raise ValueError("Number of heads must be greater than zero.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if values_encoder is None:
            values_encoder = keys_encoder

        self.attention_keys = get_attention_states(keys_encoder)
        self.attention_mask = get_attention_mask(keys_encoder)
        self.attention_values = get_attention_states(values_encoder)
Exemplo n.º 2
0
    def __init__(self,
                 name: str,
                 n_heads: int,
                 keys_encoder: Attendable,
                 values_encoder: Attendable = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)

        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob

        self.keys_encoder = keys_encoder

        if values_encoder is not None:
            self.values_encoder = values_encoder
        else:
            self.values_encoder = self.keys_encoder

        if self.n_heads <= 0:
            raise ValueError("Number of heads must be greater than zero.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        self._variable_scope.set_initializer(
            tf.variance_scaling_initializer(mode="fan_avg",
                                            distribution="uniform"))
Exemplo n.º 3
0
    def __init__(self,
                 name: str,
                 n_heads: int,
                 keys_encoder: Attendable,
                 values_encoder: Attendable = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)

        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob

        self.keys_encoder = keys_encoder

        if values_encoder is not None:
            self.values_encoder = values_encoder
        else:
            self.values_encoder = self.keys_encoder

        if self.n_heads <= 0:
            raise ValueError("Number of heads must be greater than zero.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        self._variable_scope.set_initializer(tf.variance_scaling_initializer(
            mode="fan_avg", distribution="uniform"))
Exemplo n.º 4
0
    def __init__(self,
                 name: str,
                 encoder: Stateful,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, save_checkpoint, load_checkpoint,
                               initializers)

        self.encoder = encoder
Exemplo n.º 5
0
    def __init__(self,
                 name: str,
                 encoder: Stateful,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)

        self.encoder = encoder
Exemplo n.º 6
0
    def __init__(self,
                 name: str,
                 encoder: Attendable,
                 dropout_keep_prob: float = 1.0,
                 state_size: int = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, save_checkpoint, load_checkpoint)

        self.encoder = encoder
        self.dropout_keep_prob = dropout_keep_prob
        self._state_size = state_size

        # TODO blessing
        log("Hidden features: {}".format(self.hidden_features))
        log("Attention mask: {}".format(self.attention_mask))
Exemplo n.º 7
0
    def __init__(self,
                 name: str,
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)
        self.attentions_in_time = []  # type: List[tf.Tensor]
        self.attention_state_size = attention_state_size
        self._share_projections = share_attn_projections
        self._use_sentinels = use_sentinels

        self.att_scope_name = "attention_{}".format(name)
Exemplo n.º 8
0
    def __init__(self,
                 name: str,
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)
        self.attentions_in_time = []  # type: List[tf.Tensor]
        self.attention_state_size = attention_state_size
        self._share_projections = share_attn_projections
        self._use_sentinels = use_sentinels

        self.att_scope_name = "attention_{}".format(name)
Exemplo n.º 9
0
    def __init__(self,
                 name: str,
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        BaseAttention.__init__(self, name, save_checkpoint, load_checkpoint)
        self.attentions_in_time = []  # type: List[tf.Tensor]
        self.attention_state_size = attention_state_size
        self._share_projections = share_attn_projections
        self._use_sentinels = use_sentinels

        self.att_scope_name = "attention_{}".format(name)

        with self.use_scope():
            self.attn_v = tf.get_variable(
                "attn_v", [1, 1, self.attention_state_size],
                initializer=tf.glorot_normal_initializer())
Exemplo n.º 10
0
    def __init__(self,
                 name: str,
                 encoder: Attendable,
                 dropout_keep_prob: float = 1.0,
                 state_size: int = None,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(
            self, name, reuse, save_checkpoint, load_checkpoint, initializers)

        self.encoder = encoder
        self.dropout_keep_prob = dropout_keep_prob
        self._state_size = state_size

        self._variable_scope.set_initializer(
            tf.random_normal_initializer(stddev=0.001))
Exemplo n.º 11
0
    def __init__(self,
                 name: str,
                 encoder: Attendable,
                 dropout_keep_prob: float = 1.0,
                 state_size: int = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(
            self, name, save_checkpoint, load_checkpoint, initializers)

        self.encoder = encoder
        self.dropout_keep_prob = dropout_keep_prob
        self._state_size = state_size

        self._variable_scope.set_initializer(
            tf.random_normal_initializer(stddev=0.001))

        # TODO blessing
        log("Hidden features: {}".format(self.hidden_features))
        log("Attention mask: {}".format(self.attention_mask))
Exemplo n.º 12
0
    def __init__(self,
                 name: str,
                 attention_state_size: int,
                 share_attn_projections: bool = False,
                 use_sentinels: bool = False,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)
        self.attentions_in_time = []  # type: List[tf.Tensor]
        self.attention_state_size = attention_state_size
        self._share_projections = share_attn_projections
        self._use_sentinels = use_sentinels

        self.att_scope_name = "attention_{}".format(name)

        with self.use_scope():
            self.attn_v = get_variable(
                "attn_v", [1, 1, self.attention_state_size],
                initializer=tf.random_normal_initializer(stddev=0.001))
Exemplo n.º 13
0
    def __init__(self,
                 name: str,
                 n_heads: int,
                 keys_encoder: Attendable,
                 values_encoder: Attendable = None,
                 dropout_keep_prob: float = 1.0,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, save_checkpoint, load_checkpoint)

        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob

        if self.n_heads <= 0:
            raise ValueError("Number of heads must be greater than zero.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        if values_encoder is None:
            values_encoder = keys_encoder

        self.attention_keys = get_attention_states(keys_encoder)
        self.attention_values = get_attention_states(values_encoder)
        self.attention_mask = get_attention_mask(keys_encoder)

        self._dimension = self.attention_keys.get_shape()[-1].value

        if self._dimension % self.n_heads != 0:
            raise ValueError("Model dimension ({}) must be divisible by the "
                             "number of attention heads ({})".format(
                                 self._dimension, self.n_heads))

        self._head_dim = int(self._dimension / self.n_heads)
        self._scaling_factor = 1 / math.sqrt(self._head_dim)