def transduce(self, embed_sent: ExpressionSequence) -> ExpressionSequence: src = embed_sent.as_tensor() sent_len = src.dim()[0][1] batch_size = src.dim()[1] pad_size = (self.window_receptor - 1) / 2 #TODO adapt it also for even window size src = dy.concatenate([ dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src, dy.zeroes((self.input_dim, pad_size), batch_size=batch_size) ], d=1) padded_sent_len = sent_len + 2 * pad_size conv1 = dy.parameter(self.pConv1) bias1 = dy.parameter(self.pBias1) src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1), batch_size=batch_size) cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1]) hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1), batch_size=batch_size) if self.non_linearity is 'linear': hidden_layer = hidden_layer elif self.non_linearity is 'tanh': hidden_layer = dy.tanh(hidden_layer) elif self.non_linearity is 'relu': hidden_layer = dy.rectify(hidden_layer) elif self.non_linearity is 'sigmoid': hidden_layer = dy.logistic(hidden_layer) for conv_hid, bias_hid in self.builder_layers: hidden_layer = dy.conv2d_bias(hidden_layer, dy.parameter(conv_hid), dy.parameter(bias_hid), stride=[1, 1]) hidden_layer = dy.reshape(hidden_layer, (self.internal_dim, sent_len, 1), batch_size=batch_size) if self.non_linearity is 'linear': hidden_layer = hidden_layer elif self.non_linearity is 'tanh': hidden_layer = dy.tanh(hidden_layer) elif self.non_linearity is 'relu': hidden_layer = dy.rectify(hidden_layer) elif self.non_linearity is 'sigmoid': hidden_layer = dy.logistic(hidden_layer) last_conv = dy.parameter(self.last_conv) last_bias = dy.parameter(self.last_bias) output = dy.conv2d_bias(hidden_layer, last_conv, last_bias, stride=[1, 1]) output = dy.reshape(output, (sent_len, self.output_dim), batch_size=batch_size) output_seq = ExpressionSequence(expr_tensor=output) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq
def transduce(self, es: ExpressionSequence) -> ExpressionSequence: """ returns the list of output Expressions obtained by adding the given inputs to the current state, one by one. Args: es: an ExpressionSequence """ es_list = [es] for layer_i, fb in enumerate(self.builder_layers): reduce_factor = self._reduce_factor_for_layer(layer_i) if es_list[0].mask is None: mask_out = None else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor) if self.downsampling_method == "concat" and len( es_list[0]) % reduce_factor != 0: raise ValueError( f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, " f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. " f"Set Batcher's pad_src_to_multiple argument accordingly.") fs = fb.transduce(es_list) if layer_i < len(self.builder_layers) - 1: if self.downsampling_method == "skip": es_list = [ ExpressionSequence(expr_list=fs[::reduce_factor], mask=mask_out) ] elif self.downsampling_method == "concat": es_len = len(es_list[0]) es_list_fwd = [] for i in range(0, es_len, reduce_factor): for j in range(reduce_factor): if i == 0: es_list_fwd.append([]) es_list_fwd[j].append(fs[i + j]) es_list = [ ExpressionSequence(expr_list=es_list_fwd[j], mask=mask_out) for j in range(reduce_factor) ] else: raise RuntimeError( f"unknown downsampling_method {self.downsampling_method}" ) else: # concat final outputs ret_es = ExpressionSequence(expr_list=[f for f in fs], mask=mask_out) self._final_states = [ FinalTransducerState(fb.get_final_states()[0].main_expr(), fb.get_final_states()[0].cell_expr()) for fb in self.builder_layers ] return ret_es
def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence: """ transduce the sequence Args: expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated) Returns: expression sequence """ Wq, Wk, Wv, Wo = [ dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo) ] bq, bk, bv, bo = [ dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo) ] # Start with a [(length, model_size) x batch] tensor x = expr_seq.as_transposed_tensor() x_len = x.dim()[0][0] x_batch = x.dim()[1] # Get the query key and value vectors # TODO: do we need bias broadcasting in DyNet? # q = dy.affine_transform([bq, x, Wq]) # k = dy.affine_transform([bk, x, Wk]) # v = dy.affine_transform([bv, x, Wv]) q = bq + x * Wq k = bk + x * Wk v = bv + x * Wv # Split to batches [(length, head_dim) x batch * num_heads] tensor q, k, v = [ dy.reshape(x, (x_len, self.head_dim), batch_size=x_batch * self.num_heads) for x in (q, k, v) ] # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys attn_score = q * dy.transpose(k) / sqrt(self.head_dim) if expr_seq.mask is not None: mask = dy.inputTensor(np.repeat( expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(), batched=True) * -1e10 attn_score = attn_score + mask attn_prob = dy.softmax(attn_score, d=1) # Reduce using attention and resize to match [(length, model_size) x batch] o = dy.reshape(attn_prob * v, (x_len, self.input_dim), batch_size=x_batch) # Final transformation # o = dy.affine_transform([bo, attn_prob * v, Wo]) o = bo + o * Wo expr_seq = ExpressionSequence(expr_transposed_tensor=o, mask=expr_seq.mask) self._final_states = [FinalTransducerState(expr_seq[-1], None)] return expr_seq
def transduce(self, es: ExpressionSequence) -> ExpressionSequence: """ returns the list of output Expressions obtained by adding the given inputs to the current state, one by one, to both the forward and backward RNNs, and concatenating. Args: es: an ExpressionSequence """ es_list = [es] for layer_i, (fb, bb) in enumerate(self.builder_layers): reduce_factor = self._reduce_factor_for_layer(layer_i) if es_list[0].mask is None: mask_out = None else: mask_out = es_list[0].mask.lin_subsampled(reduce_factor) if self.downsampling_method=="concat" and len(es_list[0]) % reduce_factor != 0: raise ValueError(f"For 'concat' subsampling, sequence lengths must be multiples of the total reduce factor, " f"but got sequence length={len(es_list[0])} for reduce_factor={reduce_factor}. " f"Set Batcher's pad_src_to_multiple argument accordingly.") fs = fb.transduce(es_list) bs = bb.transduce([ReversedExpressionSequence(es_item) for es_item in es_list]) if layer_i < len(self.builder_layers) - 1: if self.downsampling_method=="skip": es_list = [ExpressionSequence(expr_list=fs[::reduce_factor], mask=mask_out), ExpressionSequence(expr_list=bs[::reduce_factor][::-1], mask=mask_out)] elif self.downsampling_method=="concat": es_len = len(es_list[0]) es_list_fwd = [] es_list_bwd = [] for i in range(0, es_len, reduce_factor): for j in range(reduce_factor): if i==0: es_list_fwd.append([]) es_list_bwd.append([]) es_list_fwd[j].append(fs[i+j]) es_list_bwd[j].append(bs[len(es_list[0])-reduce_factor+j-i]) es_list = [ExpressionSequence(expr_list=es_list_fwd[j], mask=mask_out) for j in range(reduce_factor)] + \ [ExpressionSequence(expr_list=es_list_bwd[j], mask=mask_out) for j in range(reduce_factor)] else: raise RuntimeError(f"unknown downsampling_method {self.downsampling_method}") else: # concat final outputs ret_es = ExpressionSequence( expr_list=[dy.concatenate([f, b]) for f, b in zip(fs, ReversedExpressionSequence(bs))], mask=mask_out) self._final_states = [FinalTransducerState(dy.concatenate([fb.get_final_states()[0].main_expr(), bb.get_final_states()[0].main_expr()]), dy.concatenate([fb.get_final_states()[0].cell_expr(), bb.get_final_states()[0].cell_expr()])) \ for (fb, bb) in self.builder_layers] return ret_es
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: sent_len = len(src) embeddings = dy.strided_select(dy.parameter(self.embedder), [1,1], [0,0], [self.input_dim, sent_len]) if self.op == 'sum': output = embeddings + src.as_tensor() elif self.op == 'concat': output = dy.concatenate([embeddings, src.as_tensor()]) else: raise ValueError(f'Illegal op {op} in PositionalTransducer (options are "sum"/"concat")') output_seq = ExpressionSequence(expr_tensor=output, mask=src.mask) self._final_states = [FinalTransducerState(output_seq[-1])] return output_seq