def transduce(self, es): es_expr = es.as_tensor() # e.g. es_expr.dim() ==((276, 240), 1) sent_len = es_expr.dim()[0][0] batch_size=es_expr.dim()[1] # convolutions won't work if sent length is too short; pad if necessary pad_size = 0 while math.ceil(float(sent_len + pad_size - self.filter_size_time + 1) / float(self.stride[0])) < self.filter_size_time: pad_size += 1 if pad_size>0: es_expr = dy.concatenate([es_expr, dy.zeroes((pad_size, self.freq_dim * self.chn_dim), batch_size=es_expr.dim()[1])]) sent_len += pad_size # convolution layers es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) # ((276, 80, 3), 1) cnn_layer1 = dy.conv2d(es_chn, dy.parameter(self.filters1), stride=self.stride, is_valid=True) # ((137, 39, 32), 1) cnn_layer2 = dy.conv2d(cnn_layer1, dy.parameter(self.filters2), stride=self.stride, is_valid=True) # ((68, 19, 32), 1) cnn_out = dy.reshape(cnn_layer2, (cnn_layer2.dim()[0][0], cnn_layer2.dim()[0][1]*cnn_layer2.dim()[0][2]), batch_size=batch_size) # ((68, 608), 1) es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])] # RNN layers for (fb, bb) in self.builder_layers: fs = fb.initial_state().transduce(es_list) bs = bb.initial_state().transduce(reversed(es_list)) es_list = [dy.concatenate([f, b]) for f, b in zip(fs, reversed(bs))] return es_list
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() src_height = src.dim()[0][0] src_width = src.dim()[0][1] # src_channels = 1 batch_size = src.dim()[1] # convolution and pooling layers # src dim is ((40, 1000), 128) src = padding(src, self.filter_width[0]+3) l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128) pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128) pool1 = padding(pool1, self.filter_width[1]+3) l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128) pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128) pool2 = padding(pool2, self.filter_width[2]) l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128) pool3 = dy.max_dim(l3, d = 1) my_norm = dy.l2_norm(pool3) + 1e-6 output = dy.cdiv(pool3,my_norm) output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size) return ExpressionSequence(expr_tensor=output)
def calc_attention(self, state): V = dy.parameter(self.pV) U = dy.parameter(self.pU) WI = self.WI curr_sent_mask = self.curr_sent.mask if self.attention_vecs: conv_feats = dy.conv2d(self.attention_vecs[-1], self.pL, stride=[1, 1], is_valid=False) conv_feats = dy.transpose( dy.reshape(conv_feats, (conv_feats.dim()[0][0], self.hidden_dim), batch_size=conv_feats.dim()[1])) h = dy.tanh(dy.colwise_add(WI + conv_feats, V * state)) else: h = dy.tanh(dy.colwise_add(WI, V * state)) scores = dy.transpose(U * h) if curr_sent_mask is not None: scores = curr_sent_mask.add_to_tensor_expr(scores, multiplicator=-100.0) normalized = dy.softmax(scores) self.attention_vecs.append(normalized) return normalized
def apply(self, x_input): #print "\tapplying",self.kernel.expr().npvalue().shape,"convolution" #output_s = dy.conv2d_bias(x_input, self.kernel_s.expr(), self.bias_s.expr(), (self.s_x, self.s_y), is_valid=self.is_valid) #output_t = dy.conv2d_bias(x_input, self.kernel_t.expr(), self.bias_t.expr(), (self.s_x, self.s_y), is_valid=self.is_valid) #return dy.cmult(dy.tanh(output_t),dy.logistic(output_s)) output = dy.conv2d(x_input, self.kernel.expr(update=True), (self.s_x, self.s_y), is_valid=self.is_valid) return dy.rectify(output)
def transduce(self, es: expression_seqs.ExpressionSequence) -> expression_seqs.ExpressionSequence: mask = es.mask sent_len = len(es) es_expr = es.as_transposed_tensor() batch_size = es_expr.dim()[1] es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) h_out = {} for direction in ["fwd", "bwd"]: # input convolutions gates_xt_bias = dy.conv2d_bias(es_chn, dy.parameter(self.params["x2all_" + direction]), dy.parameter(self.params["b_" + direction]), stride=(1, 1), is_valid=False) gates_xt_bias_list = [dy.pick_range(gates_xt_bias, i, i + 1) for i in range(sent_len)] h = [] c = [] for input_pos in range(sent_len): directional_pos = input_pos if direction == "fwd" else sent_len - input_pos - 1 gates_t = gates_xt_bias_list[directional_pos] if input_pos > 0: # recurrent convolutions gates_h_t = dy.conv2d(h[-1], dy.parameter(self.params["h2all_" + direction]), stride=(1, 1), is_valid=False) gates_t += gates_h_t # standard LSTM logic if len(c) == 0: c_tm1 = dy.zeros((self.freq_dim * self.num_filters,), batch_size=batch_size) else: c_tm1 = c[-1] gates_t_reshaped = dy.reshape(gates_t, (4 * self.freq_dim * self.num_filters,), batch_size=batch_size) c_t = dy.reshape(dy.vanilla_lstm_c(c_tm1, gates_t_reshaped), (self.freq_dim * self.num_filters,), batch_size=batch_size) h_t = dy.vanilla_lstm_h(c_t, gates_t_reshaped) h_t = dy.reshape(h_t, (1, self.freq_dim, self.num_filters,), batch_size=batch_size) if mask is None or np.isclose(np.sum(mask.np_arr[:, input_pos:input_pos + 1]), 0.0): c.append(c_t) h.append(h_t) else: c.append( mask.cmult_by_timestep_expr(c_t, input_pos, True) + mask.cmult_by_timestep_expr(c[-1], input_pos, False)) h.append( mask.cmult_by_timestep_expr(h_t, input_pos, True) + mask.cmult_by_timestep_expr(h[-1], input_pos, False)) h_out[direction] = h ret_expr = [] for state_i in range(len(h_out["fwd"])): state_fwd = h_out["fwd"][state_i] state_bwd = h_out["bwd"][-1 - state_i] output_dim = (state_fwd.dim()[0][1] * state_fwd.dim()[0][2],) fwd_reshape = dy.reshape(state_fwd, output_dim, batch_size=batch_size) bwd_reshape = dy.reshape(state_bwd, output_dim, batch_size=batch_size) ret_expr.append(dy.concatenate([fwd_reshape, bwd_reshape], d=0 if self.reshape_output else 2)) return expression_seqs.ExpressionSequence(expr_list=ret_expr, mask=mask) # TODO: implement get_final_states()
def __call__(self, src): src = src.as_tensor() # convolutional layer src = padding(src, src.dim()[0][0], src.dim()[0][1], self.filter_width, self.stride, src.dim()[1]) l1 = dy.rectify( dy.conv2d(src, dy.parameter(self.filter_conv), stride=[self.stride, self.stride], is_valid=True)) timestep = l1.dim()[0][1] features = l1.dim()[0][2] batch_size = l1.dim()[1] # transpose l1 to be (timesetp, dim), but keep the batch_size. rhn_in = dy.reshape(l1, (timestep, features), batch_size=batch_size) rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)] for l in range(self.rhn_num_hidden_layers): rhn_out = [] # initialize a random vector for the first state vector, keep the same batch size. prev_state = dy.parameter(self.init[l]) # begin recurrent high way network for t in range(timestep): for m in range(0, self.rhn_microsteps): H = dy.affine_transform([ dy.parameter(self.recur[l][m][1]), dy.parameter(self.recur[l][m][0]), prev_state ]) T = dy.affine_transform([ dy.parameter(self.recur[l][m][3]), dy.parameter(self.recur[l][m][2]), prev_state ]) if m == 0: H += dy.parameter(self.linear[l][0]) * rhn_in[t] T += dy.parameter(self.linear[l][1]) * rhn_in[t] H = dy.tanh(H) T = dy.logistic(T) prev_state = dy.cmult(1 - T, prev_state) + dy.cmult( T, H) # ((1024, ), batch_size) rhn_out.append(prev_state) if self.residual and l > 0: rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)] rhn_in = rhn_out # Compute the attention-weighted average of the activations rhn_in = dy.concatenate_cols(rhn_in) scores = dy.transpose(dy.parameter(self.attention[0][1])) * dy.tanh( dy.parameter(self.attention[0][0]) * rhn_in) # ((1,510), batch_size) scores = dy.reshape(scores, (scores.dim()[0][1], ), batch_size=scores.dim()[1]) attn_out = rhn_in * dy.softmax( scores ) # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size) return ExpressionSequence(expr_tensor=attn_out)
def transduce(self, src): src = src.as_tensor() src_height = src.dim()[0][0] src_width = src.dim()[0][1] src_channels = 1 batch_size = src.dim()[1] src = dy.reshape(src, (src_height, src_width, src_channels), batch_size=batch_size) # ((276, 80, 3), 1) # print(self.filters1) # convolution and pooling layers l1 = dy.rectify( dy.conv2d(src, dy.parameter(self.filters1), stride=[self.stride[0], self.stride[0]], is_valid=True)) pool1 = dy.maxpooling2d(l1, (1, 4), (1, 2), is_valid=True) l2 = dy.rectify( dy.conv2d(pool1, dy.parameter(self.filters2), stride=[self.stride[1], self.stride[1]], is_valid=True)) pool2 = dy.maxpooling2d(l2, (1, 4), (1, 2), is_valid=True) l3 = dy.rectify( dy.conv2d(pool2, dy.parameter(self.filters3), stride=[self.stride[2], self.stride[2]], is_valid=True)) pool3 = dy.kmax_pooling(l3, 1, d=1) # print(pool3.dim()) output = dy.cdiv(pool3, dy.sqrt(dy.squared_norm(pool3))) output = dy.reshape(output, (self.num_filters[2], ), batch_size=batch_size) # print("my dim: ", output.dim()) return ExpressionSequence(expr_tensor=output)
def run_classifier(self, common_top_recur, word_inputs, domain_flag): batch_size = word_inputs.shape[1] seq_len = word_inputs.shape[0] cnn_filter = [] for filt in self.filter: cnn_filter.append(dy.parameter(filt)) cnn_W = dy.parameter(self.class_W) cnn_input = dy.reshape(common_top_recur, (1, seq_len, 2 * self.lstm_hiddens), batch_size) # print(cnn_input.npvalue().shape) cnn_out_list = [] for i in range(len(cnn_filter)): cnn_out = dy.conv2d(cnn_input, cnn_filter[i], [1, 1], is_valid=False) # len*batch*filter_num # print(cnn_out.npvalue().shape) pool_out = dy.max_dim(cnn_out, d=1) # print(pool_out.npvalue().shape) pool_out = dy.reshape(pool_out, (self.filter_size, ), batch_size) # print(pool_out.npvalue().shape) pool_out = dy.rectify(pool_out) cnn_out_list.append(pool_out) final_out = dy.concatenate(cnn_out_list) result = cnn_W * final_out predict = np.argmax(result.npvalue(), axis=0) # print(predict) cor = 0. for pre in predict: if int(pre) == domain_flag: cor += 1 class_accurate = cor / batch_size target = [domain_flag] * batch_size # [0,0,0,0] # print(result.npvalue().shape, np.array(target).shape) classes_loss = dy.pickneglogsoftmax_batch(result, target) class_loss = dy.sum_batches(classes_loss) / batch_size # print(class_loss.npvalue().shape) return class_loss, class_accurate
def __call__(self, sentence, c2i, maxn_char, act, train=False): words_batch = [] for token in sentence: chars_emb = [self.clookup[int(c2i.get(c, 0))] for c in token.chars] c2w = dy.concatenate_cols(chars_emb) c2w = dy.reshape(c2w, tuple(list(c2w.dim()[0]) + [1])) words_batch.append(c2w) words_batch = dy.concatenate_to_batch(words_batch) convds = [dy.conv2d(words_batch, W, stride=( 1, 1), is_valid=True) for W in self.Ws] actds = [act(convd) for convd in convds] poolds = [dy.maxpooling2d(actd, ksize=(1, maxn_char-win_size+1), stride=(1, 1)) for win_size, actd in zip(self.win_sizes, actds)] words_batch = [dy.reshape(poold, (poold.dim()[0][2],)) for poold in poolds] words_batch = dy.concatenate([out for out in words_batch]) c2w_emb = [] for idx, token in enumerate(sentence): c2w_emb.append(dy.pick_batch_elem(words_batch, idx)) return c2w_emb
def __call__(self, es): es_expr = es.as_tensor() sent_len = es_expr.dim()[0][0] batch_size = es_expr.dim()[1] # convolutions won't work if sentence length is too short; pad if necessary pad_size = 0 while self.get_output_len(sent_len + pad_size) < self.filter_size_time: pad_size += 1 if pad_size > 0: es_expr = dy.concatenate([ es_expr, dy.zeroes((pad_size, self.freq_dim * self.chn_dim), batch_size=es_expr.dim()[1]) ]) sent_len += pad_size if es_expr.dim() == ((sent_len, self.freq_dim, self.chn_dim), batch_size): es_chn = es_expr else: es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) cnn_layer = es_chn # loop over layers for layer_i in range(len(self.filters_layers)): cnn_layer_prev = cnn_layer filters = self.filters_layers[layer_i] # convolution cnn_layer = dy.conv2d(cnn_layer, dy.parameter(filters), stride=(1, 1), is_valid=True) # non-linearity if self.nonlinearity == "rectify": cnn_layer = dy.rectify(cnn_layer) elif self.nonlinearity == "silu": cnn_layer = dy.silu(cnn_layer) elif self.nonlinearity is not None: raise RuntimeError("unknown nonlinearity: %s" % self.nonlinearity) # max pooling if self.pooling[layer_i]: cnn_layer = dy.maxpooling2d(cnn_layer, (3, 3), stride=self.pooling[layer_i], is_valid=True) mask_out = es.mask.lin_subsampled(trg_len=cnn_layer.dim()[0][0]) if self.output_tensor: return expression_seqs.ExpressionSequence(tensor_expr=cnn_layer, mask=mask_out) else: cnn_out = dy.reshape( cnn_layer, (cnn_layer.dim()[0][0], cnn_layer.dim()[0][1] * cnn_layer.dim()[0][2]), batch_size=batch_size) es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])] return expression_seqs.ExpressionSequence(list_expr=es_list, mask=mask_out)
def __call__(self, es): es_expr = es.as_tensor() sent_len = es_expr.dim()[0][0] batch_size = es_expr.dim()[1] # convolutions won't work if sentence length is too short; pad if necessary pad_size = 0 while self.get_output_len(sent_len + pad_size) < self.filter_size_time: pad_size += 1 es_expr = self.pad(es_expr, pad_size) sent_len += pad_size # loop over layers if es_expr.dim() == ((sent_len, self.freq_dim, self.chn_dim), batch_size): es_chn = es_expr else: es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) cnn_layer = es_chn mask_out = None for layer_i in range(len(self.filters_layers)): cnn_filter = self.weight_noise(self.filters_layers[layer_i], self.train) if not self.pre_activation: cnn_layer = dy.conv2d( cnn_layer, cnn_filter, stride=self.get_stride_for_layer(layer_i), is_valid=True) if self.use_bn: mask_out = None if es.mask is None else es.mask.lin_subsampled( trg_len=cnn_layer.dim()[0][0]) cnn_layer = self.bn_layers[layer_i](cnn_layer, train=self.train, mask=mask_out) cnn_layer = self.apply_nonlinearity(self.nonlinearity, cnn_layer) self.last_output.append(cnn_layer) if self.pre_activation: cnn_layer = dy.conv2d( cnn_layer, cnn_filter, stride=self.get_stride_for_layer(layer_i), is_valid=True) mask_out = None if es.mask is None else es.mask.lin_subsampled( trg_len=cnn_layer.dim()[0][0]) if self.output_transposed_tensor: return expression_seqs.ExpressionSequence( expr_transposed_tensor=cnn_layer, mask=mask_out) else: cnn_out = dy.reshape( cnn_layer, (cnn_layer.dim()[0][0], cnn_layer.dim()[0][1] * cnn_layer.dim()[0][2]), batch_size=batch_size) es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])] return expression_seqs.ExpressionSequence(expr_list=es_list, mask=mask_out)
def get_vecs(self, node): # word -> input vector of LSTM need_word_fallback = hasattr(self, "is_train") and self.options.is_train and \ hasattr(self, "word_fallback") and \ self.options.word_fallback > 0 and \ self.random.random() < self.options.word_fallback if not node.norm: # empty string word_vec = self.word_embedding("*EMPTY*") elif self.options.cembedding_dims != 0 and self.options.word_threshold > 1 \ and (node.norm not in self.freq_words or need_word_fallback): # use character vector char_vecs = [self.char_embedding(i) for i in node.norm] if getattr(self.options, "cembedding_type", "rnn") == "rnn": char_vecs_o = self.c_lstm(char_vecs) word_vec = (char_vecs_o[0] + char_vecs_o[-1]) / 2 else: pad_size = max(self.options.cembedding_filters) - 1 zero = dn.zeros((self.options.cembedding_dims,)) char_vecs = [zero] * pad_size + char_vecs + [zero] * pad_size pooled_vectors = [] conv_input = dn.transpose(dn.concatenate(char_vecs, 1)) conv_input_stacked = dn.reshape(conv_input, conv_input.dim()[0] + (1,)) cembedding_filter_count = self.options.wembedding_dims / len(self.options.cembedding_filters) for filter_size, conv_W in zip(self.options.cembedding_filters, self.c_conv_W.components): conv_W_expr = conv_W.expr() conved = dn.conv2d(conv_input_stacked, conv_W_expr, [1, 1]) conved = dn.rectify(conved) conved_dim = len(char_vecs) - filter_size + 1 pooled = dn.maxpooling2d(conved, (conved_dim, 1), (1, 1) ) pooled_vectors.append(dn.reshape(pooled, (cembedding_filter_count,))) word_vec = dn.concatenate(pooled_vectors) else: # use word vector word_vec = self.word_embedding(node.norm) vecs = [word_vec] if self.options.pembedding_dims > 0: postag_dropout = getattr(self, "postag_dropout", 0.0) pos_vec = self.pos_embedding(node.postag) if self.options.is_train and postag_dropout > 0: pos_vec = dn.block_dropout(pos_vec, postag_dropout) vecs.append(pos_vec) if self.options.supertag_embedding > 0: supertag_dropout = getattr(self, "supertag_dropout", 0.0) supertag_vec = self.supertag_embedding(node.supertag) if self.options.is_train and supertag_dropout > 0: supertag_vec = dn.block_dropout(supertag_vec, supertag_dropout) vecs.append(supertag_vec) if self.ext_embedding is not None: ext_vec = self.ext_embedding( node.form, (node.norm,), const=getattr(self.options, "static_ext_embedding", False)) vecs.append(ext_vec) return dn.concatenate(vecs)