Exemplo n.º 1
0
    def transduce(self, embed_sent: ExpressionSequence) -> ExpressionSequence:
        src = embed_sent.as_tensor()

        sent_len = src.dim()[0][1]
        batch_size = src.dim()[1]
        pad_size = (self.window_receptor -
                    1) / 2  #TODO adapt it also for even window size

        src = dy.concatenate([
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src,
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size)
        ],
                             d=1)
        padded_sent_len = sent_len + 2 * pad_size

        conv1 = dy.parameter(self.pConv1)
        bias1 = dy.parameter(self.pBias1)
        src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1),
                             batch_size=batch_size)
        cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1])

        hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1),
                                  batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)

        for conv_hid, bias_hid in self.builder_layers:
            hidden_layer = dy.conv2d_bias(hidden_layer,
                                          dy.parameter(conv_hid),
                                          dy.parameter(bias_hid),
                                          stride=[1, 1])
            hidden_layer = dy.reshape(hidden_layer,
                                      (self.internal_dim, sent_len, 1),
                                      batch_size=batch_size)
            if self.non_linearity is 'linear':
                hidden_layer = hidden_layer
            elif self.non_linearity is 'tanh':
                hidden_layer = dy.tanh(hidden_layer)
            elif self.non_linearity is 'relu':
                hidden_layer = dy.rectify(hidden_layer)
            elif self.non_linearity is 'sigmoid':
                hidden_layer = dy.logistic(hidden_layer)
        last_conv = dy.parameter(self.last_conv)
        last_bias = dy.parameter(self.last_bias)
        output = dy.conv2d_bias(hidden_layer,
                                last_conv,
                                last_bias,
                                stride=[1, 1])
        output = dy.reshape(output, (sent_len, self.output_dim),
                            batch_size=batch_size)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Exemplo n.º 2
0
    def transduce(self, es: ExpressionSequence) -> ExpressionSequence:
        """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one.

    Args:
      es: an ExpressionSequence
    """
        es_list = [es]

        for layer_i, fb in enumerate(self.builder_layers):
            reduce_factor = self._reduce_factor_for_layer(layer_i)

            if es_list[0].mask is None: mask_out = None
            else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor)

            if self.downsampling_method == "concat" and len(
                    es_list[0]) % reduce_factor != 0:
                raise ValueError(
                    f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, "
                    f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. "
                    f"Set Batcher's pad_src_to_multiple argument accordingly.")
            fs = fb.transduce(es_list)
            if layer_i < len(self.builder_layers) - 1:
                if self.downsampling_method == "skip":
                    es_list = [
                        ExpressionSequence(expr_list=fs[::reduce_factor],
                                           mask=mask_out)
                    ]
                elif self.downsampling_method == "concat":
                    es_len = len(es_list[0])
                    es_list_fwd = []
                    for i in range(0, es_len, reduce_factor):
                        for j in range(reduce_factor):
                            if i == 0:
                                es_list_fwd.append([])
                            es_list_fwd[j].append(fs[i + j])
                    es_list = [
                        ExpressionSequence(expr_list=es_list_fwd[j],
                                           mask=mask_out)
                        for j in range(reduce_factor)
                    ]
                else:
                    raise RuntimeError(
                        f"unknown downsampling_method {self.downsampling_method}"
                    )
            else:
                # concat final outputs
                ret_es = ExpressionSequence(expr_list=[f for f in fs],
                                            mask=mask_out)

        self._final_states = [
            FinalTransducerState(fb.get_final_states()[0].main_expr(),
                                 fb.get_final_states()[0].cell_expr())
            for fb in self.builder_layers
        ]

        return ret_es
Exemplo n.º 3
0
    def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence:
        """
    transduce the sequence

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """

        Wq, Wk, Wv, Wo = [
            dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo)
        ]
        bq, bk, bv, bo = [
            dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo)
        ]

        # Start with a [(length, model_size) x batch] tensor
        x = expr_seq.as_transposed_tensor()
        x_len = x.dim()[0][0]
        x_batch = x.dim()[1]
        # Get the query key and value vectors
        # TODO: do we need bias broadcasting in DyNet?
        # q = dy.affine_transform([bq, x, Wq])
        # k = dy.affine_transform([bk, x, Wk])
        # v = dy.affine_transform([bv, x, Wv])
        q = bq + x * Wq
        k = bk + x * Wk
        v = bv + x * Wv

        # Split to batches [(length, head_dim) x batch * num_heads] tensor
        q, k, v = [
            dy.reshape(x, (x_len, self.head_dim),
                       batch_size=x_batch * self.num_heads) for x in (q, k, v)
        ]

        # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys
        attn_score = q * dy.transpose(k) / sqrt(self.head_dim)
        if expr_seq.mask is not None:
            mask = dy.inputTensor(np.repeat(
                expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(),
                                  batched=True) * -1e10
            attn_score = attn_score + mask
        attn_prob = dy.softmax(attn_score, d=1)
        # Reduce using attention and resize to match [(length, model_size) x batch]
        o = dy.reshape(attn_prob * v, (x_len, self.input_dim),
                       batch_size=x_batch)
        # Final transformation
        # o = dy.affine_transform([bo, attn_prob * v, Wo])
        o = bo + o * Wo

        expr_seq = ExpressionSequence(expr_transposed_tensor=o,
                                      mask=expr_seq.mask)

        self._final_states = [FinalTransducerState(expr_seq[-1], None)]

        return expr_seq
Exemplo n.º 4
0
  def transduce(self, es: ExpressionSequence) -> ExpressionSequence:
    """
    returns the list of output Expressions obtained by adding the given inputs
    to the current state, one by one, to both the forward and backward RNNs,
    and concatenating.

    Args:
      es: an ExpressionSequence
    """
    es_list = [es]

    for layer_i, (fb, bb) in enumerate(self.builder_layers):
      reduce_factor = self._reduce_factor_for_layer(layer_i)

      if es_list[0].mask is None: mask_out = None
      else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor)

      if self.downsampling_method=="concat" and len(es_list[0]) % reduce_factor != 0:
        raise ValueError(f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, "
                         f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. "
                         f"Set Batcher's pad_src_to_multiple argument accordingly.")
      fs = fb.transduce(es_list)
      bs = bb.transduce([ReversedExpressionSequence(es_item) for es_item in es_list])
      if layer_i < len(self.builder_layers) - 1:
        if self.downsampling_method=="skip":
          es_list = [ExpressionSequence(expr_list=fs[::reduce_factor], mask=mask_out),
                     ExpressionSequence(expr_list=bs[::reduce_factor][::-1], mask=mask_out)]
        elif self.downsampling_method=="concat":
          es_len = len(es_list[0])
          es_list_fwd = []
          es_list_bwd = []
          for i in range(0, es_len, reduce_factor):
            for j in range(reduce_factor):
              if i==0:
                es_list_fwd.append([])
                es_list_bwd.append([])
              es_list_fwd[j].append(fs[i+j])
              es_list_bwd[j].append(bs[len(es_list[0])-reduce_factor+j-i])
          es_list = [ExpressionSequence(expr_list=es_list_fwd[j], mask=mask_out) for j in range(reduce_factor)] + \
                    [ExpressionSequence(expr_list=es_list_bwd[j], mask=mask_out) for j in range(reduce_factor)]
        else:
          raise RuntimeError(f"unknown downsampling_method {self.downsampling_method}")
      else:
        # concat final outputs
        ret_es = ExpressionSequence(
          expr_list=[dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs))], mask=mask_out)

    self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(),
                                                               bb.get_final_states()[0].main_expr()]),
                                               dy.concatenate([fb.get_final_states()[0].cell_expr(),
                                                               bb.get_final_states()[0].cell_expr()])) \
                          for (fb, bb) in self.builder_layers]

    return ret_es
Exemplo n.º 5
0
 def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
   sent_len = len(src)
   embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len])
   if self.op == 'sum':
     output = embeddings + src.as_tensor()
   elif self.op == 'concat':
     output = dy.concatenate([embeddings, src.as_tensor()])
   else:
     raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")')
   output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask)
   self._final_states = [FinalTransducerState(output_seq[-1])]
   return output_seq