Пример #1
0
    def __call__(self, htA, HO, transform_flag=True):
        """

        :param htA:
        :param HO:
        :param transform_flag: determine if the model needs selective transformation,
        :return:
        """
        seq_len = len(HO)
        HO_hat = []
        Weights = []
        for i in range(seq_len):
            hiO = HO[i]
            if transform_flag:
                hiO_hat = hiO + dy.rectify(self.W_A * htA + self.W_O * hiO + self.b)
            else:
                hiO_hat = hiO
            wi = dy.tanh(dy.dot_product(self.W_concat, dy.concatenate([htA, hiO_hat])))
            HO_hat.append(hiO_hat)
            Weights.append(wi)
        HO_hat = dy.concatenate([dy.reshape(ele, d=(1, 2 * self.dim_opi)) for ele in HO_hat])
        Weights = dy.concatenate(Weights)
        # length: seq_len
        Weights = dy.softmax(Weights)
        Weights_np = Weights.npvalue()
        ho_summary_t = dy.reshape(Weights, (1, seq_len)) * HO_hat
        return dy.reshape(ho_summary_t, (2 * self.dim_opi,)), Weights_np
Пример #2
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        sent_len = src.dim()[0][1]
        src_width = 1
        batch_size = src.dim()[1]
        pad_size = (self.window_receptor -
                    1) / 2  #TODO adapt it also for even window size

        src = dy.concatenate([
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size), src,
            dy.zeroes((self.input_dim, pad_size), batch_size=batch_size)
        ],
                             d=1)
        padded_sent_len = sent_len + 2 * pad_size

        conv1 = dy.parameter(self.pConv1)
        bias1 = dy.parameter(self.pBias1)
        src_chn = dy.reshape(src, (self.input_dim, padded_sent_len, 1),
                             batch_size=batch_size)
        cnn_layer1 = dy.conv2d_bias(src_chn, conv1, bias1, stride=[1, 1])

        hidden_layer = dy.reshape(cnn_layer1, (self.internal_dim, sent_len, 1),
                                  batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)

        for conv_hid, bias_hid in self.builder_layers:
            hidden_layer = dy.conv2d_bias(hidden_layer,
                                          dy.parameter(conv_hid),
                                          dy.parameter(bias_hid),
                                          stride=[1, 1])
            hidden_layer = dy.reshape(hidden_layer,
                                      (self.internal_dim, sent_len, 1),
                                      batch_size=batch_size)
            if self.non_linearity is 'linear':
                hidden_layer = hidden_layer
            elif self.non_linearity is 'tanh':
                hidden_layer = dy.tanh(hidden_layer)
            elif self.non_linearity is 'relu':
                hidden_layer = dy.rectify(hidden_layer)
            elif self.non_linearity is 'sigmoid':
                hidden_layer = dy.logistic(hidden_layer)
        last_conv = dy.parameter(self.last_conv)
        last_bias = dy.parameter(self.last_bias)
        output = dy.conv2d_bias(hidden_layer,
                                last_conv,
                                last_bias,
                                stride=[1, 1])
        output = dy.reshape(output, (sent_len, self.output_dim),
                            batch_size=batch_size)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Пример #3
0
  def calc_loss(self, src, db_idx, src_mask=None, trg_mask=None):
    src_embeddings = self.src_embedder.embed_sent(src, mask=src_mask)
    self.src_encoder.set_input(src)
    src_encodings = self.exprseq_pooling(self.src_encoder.transduce(src_embeddings))
    trg_batch, trg_mask = self.database[db_idx]
    # print("trg_mask=\n",trg_mask)
    trg_encodings = self.encode_trg_example(trg_batch, mask=trg_mask)
    dim = trg_encodings.dim()
    trg_reshaped = dy.reshape(trg_encodings, (dim[0][0], dim[1]))
    # ### DEBUG
    # trg_npv = trg_reshaped.npvalue()
    # for i in range(dim[1]):
    #   print("--- trg_reshaped {}: {}".format(i,list(trg_npv[:,i])))
    # ### DEBUG
    prod = dy.transpose(src_encodings) * trg_reshaped
    # ### DEBUG
    # prod_npv = prod.npvalue()
    # for i in range(dim[1]):
    #   print("--- prod {}: {}".format(i,list(prod_npv[0].transpose()[i])))
    # ### DEBUG
    id_range = list(range(len(db_idx)))
    # This is ugly:
    if self.loss_direction == "forward":
      prod = dy.transpose(prod)
      loss = dy.sum_batches(dy.hinge_batch(prod, id_range))
    elif self.loss_direction == "bidirectional":
      prod = dy.reshape(prod, (len(db_idx), len(db_idx)))
      loss = dy.sum_elems(
        dy.hinge_dim(prod, id_range, d=0) + dy.hinge_dim(prod, id_range, d=1))
    else:
      raise RuntimeError("Illegal loss direction {}".format(self.loss_direction))

    return loss
Пример #4
0
    def decode_loss(self, src_encodings, tgt_seqs):
        """
        :param tgt_seqs: (tgt_heads, tgt_labels): list (length=batch_size) of (src_len)
        """

        # todo(NOTE): Sentences should start with empty token (as root of dependency tree)!

        tgt_heads, tgt_labels = tgt_seqs

        src_len = len(tgt_heads[0])
        batch_size = len(tgt_heads)
        np_tgt_heads = np.array(tgt_heads).flatten()  # (src_len * batch_size)
        np_tgt_labels = np.array(tgt_labels).flatten()
        s_arc, s_label = self.cal_scores(src_encodings)  # (src_len, src_len, bs), ([(src_len, src_len, bs)])

        s_arc_value = s_arc.npvalue()
        s_arc_choice = np.argmax(s_arc_value, axis=0).transpose().flatten()  # (src_len * batch_size)

        s_pick_labels = [dy.pick_batch(dy.reshape(score, (src_len,), batch_size=src_len * batch_size), s_arc_choice)
                     for score in s_label]
        s_argmax_labels = dy.concatenate(s_pick_labels, d=0)  # n_labels, src_len * batch_size

        reshape_s_arc = dy.reshape(s_arc, (src_len,), batch_size=src_len * batch_size)
        arc_loss = dy.pickneglogsoftmax_batch(reshape_s_arc, np_tgt_heads)
        label_loss = dy.pickneglogsoftmax_batch(s_argmax_labels, np_tgt_labels)

        loss = dy.sum_batches(arc_loss + label_loss) / batch_size
        return loss
Пример #5
0
def flatten_triple(action_scores, location_scores, argument_scores):
    """ Flattens three scores vectors by summing over all possibilities. """
    num_actions = action_scores.dim()[0][0]
    num_locations = location_scores.dim()[0][0]
    num_arguments = argument_scores.dim()[0][0]

    expanded_arguments = dy.reshape(argument_scores, (num_arguments, 1)) \
        * dy.ones((1, num_locations))
    expanded_locations = dy.ones((num_arguments, 1)) \
        * dy.reshape(location_scores, (1, num_locations))

    # num_locations x num_arguments
    location_and_argument_scores = expanded_locations + expanded_arguments
    location_and_argument_expanded = dy.reshape(location_and_argument_scores,
                                                (num_locations * num_arguments, 1)) \
        * dy.ones((1, num_actions))

    expanded_actions = dy.ones((num_arguments * num_locations, 1)) \
        * dy.reshape(action_scores, (1, num_actions))

    final_scores = location_and_argument_expanded + expanded_actions

    # num_actions * num_locations x num_arguments
    final_scores = dy.reshape(final_scores, (num_actions * num_locations * num_arguments, 1))

    return final_scores
Пример #6
0
  def transduce(self, es):
    es_expr = es.as_tensor()

    # e.g. es_expr.dim() ==((276, 240), 1)
    sent_len = es_expr.dim()[0][0]
    batch_size=es_expr.dim()[1]
    
    # convolutions won't work if sent length is too short; pad if necessary
    pad_size = 0
    while math.ceil(float(sent_len + pad_size - self.filter_size_time + 1) / float(self.stride[0])) < self.filter_size_time:
      pad_size += 1
    if pad_size>0:
      es_expr = dy.concatenate([es_expr, dy.zeroes((pad_size, self.freq_dim * self.chn_dim), batch_size=es_expr.dim()[1])])
      sent_len += pad_size

    # convolution layers    
    es_chn = dy.reshape(es_expr, (sent_len, self.freq_dim, self.chn_dim), batch_size=batch_size) # ((276, 80, 3), 1)
    cnn_layer1 = dy.conv2d(es_chn, dy.parameter(self.filters1), stride=self.stride, is_valid=True) # ((137, 39, 32), 1)
    cnn_layer2 = dy.conv2d(cnn_layer1, dy.parameter(self.filters2), stride=self.stride, is_valid=True) # ((68, 19, 32), 1)
    cnn_out = dy.reshape(cnn_layer2, (cnn_layer2.dim()[0][0], cnn_layer2.dim()[0][1]*cnn_layer2.dim()[0][2]), batch_size=batch_size) # ((68, 608), 1)
    es_list = [cnn_out[i] for i in range(cnn_out.dim()[0][0])]
    
    # RNN layers
    for (fb, bb) in self.builder_layers:
      fs = fb.initial_state().transduce(es_list)
      bs = bb.initial_state().transduce(reversed(es_list))
      es_list = [dy.concatenate([f, b]) for f, b in zip(fs, reversed(bs))]
    return es_list
    def stitch(self, layer_predictions):
        """
        Takes as input the predicted states of all the layers of a task-specific
        network and produces a linear combination of them.
        :param layer_predictions: a list of length num_layers containing lists
                                  of length seq_len of predicted states for
                                  each layer
        :return: a list of linear combinations of the predicted states at every
                time step for each layer
        """
        assert len(layer_predictions) == self.num_layers

        concatenated_layer_states = dynet.reshape(dynet.concatenate_cols(\
                list(layer_predictions)), (self.num_layers, self.hidden_dim))

        product = None
        if (self.num_layers > 1):
            product = dynet.transpose(dynet.parameter(
                self.betas)) * concatenated_layer_states
        else:
            product = dynet.parameter(self.betas) * concatenated_layer_states

        reshaped = dynet.reshape(product, (self.hidden_dim, ))

        return reshaped
Пример #8
0
def bilinear(x,
             W,
             y,
             input_size,
             seq_len,
             num_outputs=1,
             bias_x=False,
             bias_y=False):
    # x,y: (input_size x seq_len) x batch_size
    if bias_x:
        x = dy.concatenate(
            [x, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))])
    if bias_y:
        y = dy.concatenate(
            [y, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))])

    nx, ny = input_size + bias_x, input_size + bias_y
    # W: (num_outputs x ny) x nx
    lin = W * x
    if num_outputs > 1:
        lin = dy.reshape(lin, (ny, num_outputs * seq_len))
    blin = dy.transpose(y) * lin
    if num_outputs > 1:
        blin = dy.reshape(blin, (seq_len, num_outputs, seq_len))
    # seq_len_y x seq_len_x if output_size == 1
    # seq_len_y x num_outputs x seq_len_x else
    return blin
Пример #9
0
    def stitch(self, layer_predictions):
        """
        Takes as input the predicted states of all the layers of a task-specific
        network and produces a linear combination of them.
        :param layer_predictions: a list of length num_layers containing lists
                                  of length seq_len of predicted states for
                                  each layer
        :return: a list of linear combinations of the predicted states at every
                time step for each layer
        """
        assert len(layer_predictions) == self.num_layers
        linear_combinations = []
        # iterate over tuples of predictions of each layer at every time step
        for layer_states in zip(*layer_predictions):
            # concatenate the predicted state for all layers to a matrix of
            # shape (num_layers, hidden_dim)
            concatenated_layer_states = dynet.reshape(
                dynet.concatenate_cols(list(layer_states)),
                (self.num_layers, self.hidden_dim))

            # multiply with (1, num_layers) betas to produce (1, hidden_dim)
            product = dynet.transpose(dynet.parameter(
                self.betas)) * concatenated_layer_states

            # reshape to (hidden_dim)
            reshaped = dynet.reshape(product, (self.hidden_dim, ))
            linear_combinations.append(reshaped)
        return linear_combinations
Пример #10
0
  def transform(self, input_expr: dy.Expression, mask: Optional[batchers.Mask]=None):
    """
    Apply batch norm.

    Args:
      input_expr: input
      mask: compute statistics only over unmasked parts of the input expression
    """
    dim_in = input_expr.dim()
    param_bn_gamma = dy.parameter(self.gamma)
    param_bn_beta = dy.parameter(self.beta)
    if self.train:
      num_unmasked = 0
      if mask is not None:
        input_expr = set_masked_to_mean(mask, input_expr, self.time_first)
        num_unmasked = (mask.np_arr.size - np.count_nonzero(mask.np_arr)) * broadcast_factor(mask, input_expr)
      bn_mean = dy.moment_dim(input_expr, self.get_stat_dimensions(), 1, True, num_unmasked)
      neg_bn_mean_reshaped = -dy.reshape(-bn_mean, self.get_normalizer_dimensionality())
      self.population_running_mean += (-BN_MOMENTUM) * self.population_running_mean + BN_MOMENTUM * bn_mean.npvalue()
      bn_std = dy.std_dim(input_expr, self.get_stat_dimensions(), True, num_unmasked)
      self.population_running_std += (-BN_MOMENTUM) * self.population_running_std + BN_MOMENTUM * bn_std.npvalue()
    else:
      neg_bn_mean_reshaped = -dy.reshape(dy.inputVector(self.population_running_mean), self.get_normalizer_dimensionality())
      bn_std = dy.inputVector(self.population_running_std)
    bn_numerator = input_expr + neg_bn_mean_reshaped
    bn_xhat = dy.cdiv(bn_numerator, dy.reshape(bn_std, self.get_normalizer_dimensionality()) + BN_EPS)
    bn_y = dy.cmult(param_bn_gamma, bn_xhat) + param_bn_beta # y = gamma * xhat + beta
    dim_out = bn_y.dim()
    self.save_processed_arg("population_running_mean", self.population_running_mean)
    self.save_processed_arg("population_running_std", self.population_running_std)
    assert dim_out == dim_in
    return bn_y
Пример #11
0
def bilinear(x,
             W,
             y,
             input_size,
             seq_len,
             batch_size,
             num_outputs=1,
             bias_x=False,
             bias_y=False):
    # adopted from: https://github.com/jcyk/Dynet-Biaffine-dependency-parser/blob/master/lib/utils.py

    # x,y: (input_size x seq_len) x batch_size
    if bias_x:
        x = dy.concatenate(
            [x, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))])
    if bias_y:
        y = dy.concatenate(
            [y, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))])

    nx, ny = input_size + bias_x, input_size + bias_y
    # W: (num_outputs x ny) x nx
    lin = W * x
    if num_outputs > 1:
        lin = dy.reshape(lin, (ny, num_outputs * seq_len),
                         batch_size=batch_size)
    blin = dy.transpose(y) * lin
    if num_outputs > 1:
        blin = dy.reshape(blin, (seq_len, num_outputs, seq_len),
                          batch_size=batch_size)
    # seq_len_y x seq_len_x if output_size == 1
    # seq_len_y x num_outputs x seq_len_x else
    return blin
Пример #12
0
 def recurrence(self, xt, hmtm1, cmtm1, h_tilde_tm1, dropout_flag):
     """
     recurrence function of LSTM with truncated self-attention
     :param xt: current input, shape: (n_in)
     :param hmtm1: hidden memory [htm1, ..., h1], shape: (n_steps, n_out)
     :param cmtm1: cell memory: (n_steps, n_out)
     :param h_tilde_tm1: previous hidden summary, shape: (n_out, )
     :param h_tilde_tm1: previous cell summary
     :param dropout_flag: where perform partial dropout
     :return:
     """
     score = dy.concatenate([dy.dot_product(self.u, dy.tanh(\
         self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_tilde_tm1)) for i in range(self.n_steps)])
     # normalize the attention score
     score = dy.softmax(score)
     # shape: (1, n_out)
     h_tilde_t = dy.reshape(dy.transpose(score) * hmtm1, d=(self.n_out,))
     c_tilde_t = dy.transpose(score) * cmtm1
     Wx = self.W * xt
     if dropout_flag:
         # perform partial dropout over the lstm
         Wx = dy.dropout(Wx, self.dropout_rate)
     Uh = self.U * h_tilde_t
     # shape: (4*n_out)
     sum_item = Wx + Uh + self.b
     it = dy.logistic(sum_item[:self.n_out])
     ft = dy.logistic(sum_item[self.n_out:2*self.n_out])
     ot = dy.logistic(sum_item[2*self.n_out:3*self.n_out])
     c_hat = dy.tanh(sum_item[3*self.n_out:])
     ct = dy.cmult(ft, dy.reshape(c_tilde_t, d=(self.n_out,))) + dy.cmult(it, c_hat)
     ht = dy.cmult(ot, dy.tanh(ct))
     hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
     cmt = dy.concatenate([cmtm1[1:], dy.reshape(ct, (1, self.n_out))])
     return hmt, cmt, h_tilde_t
Пример #13
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print ('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print (display_activations(words, activations))
    print ('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print ('  bias=%s' % bias)
    contributions = W * features
    print (' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print ('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print ('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print ('     good (%.4f): %s' % (scores[3], contributions[3]))
    print ('very good (%.4f): %s' % (scores[4], contributions[4]))
Пример #14
0
    def _attend(self, input_vectors, state, prev_att, prev_att_expr, receptive,
                compute_attention):
        if compute_attention or prev_att_expr is None:
            w1 = self.att_w1.expr()
            w2 = self.att_w2.expr()
            w3 = self.att_w3.expr()
            w4 = self.att_w4.expr()
            v = self.att_v.expr()
            attention_weights = []
            att_cnn = self.cnn_attention.apply(
                dy.reshape(prev_att, (len(input_vectors), 1)))
            att_cnn = dy.reshape(
                att_cnn, (len(input_vectors), self.config.att_lsa_filters))

            w2dt = w2 * state.h()[-1]
            w4dt = w4 * receptive
            for cnn, input_vector in zip(att_cnn, input_vectors):
                attention_weight = v * dy.tanh(w1 * input_vector + w2dt +
                                               w3 * cnn + w4dt)
                attention_weights.append(attention_weight)

            attention_weights = dy.softmax(dy.concatenate(attention_weights))
            #print attention_weights.value()
        else:
            attention_weights = prev_att_expr

        output_vectors = dy.esum([
            vector * attention_weight for vector, attention_weight in zip(
                input_vectors, attention_weights)
        ])
        return output_vectors, attention_weights
    def train(self, trainning_set):
        for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set:
            features = self.encode_sentence(sentence, pos, chars)
            loss = []            

            entity_embeds = features[entity]

            attention, context = self.self_attend(features)
            ty = dy.vecInput(len(sentence))
            ty.set([0 if i!=trigger else 1 for i in range(len(sentence))])
            loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty))
            h_t = dy.concatenate([context, entity_embeds])
            hidden = dy.tanh(self.lb.expr() * h_t + self.lb_bias.expr())
            out_vector = dy.reshape(dy.logistic(self.lb2.expr() * hidden + self.lb2_bias.expr()), (1,))
            label = dy.scalarInput(label)
            loss.append(dy.binary_log_loss(out_vector, label))

            pres = [0]
            for pattern in rule:
                probs = self.decoder(features, pres)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                pres.append(pattern)

            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
Пример #16
0
    def decode_loss(self, src_encodings, tgt_seqs):
        """
        :param tgt_seqs: (tgt_heads, tgt_labels): list (length=batch_size) of (src_len)
        """

        # todo(NOTE): Sentences should start with empty token (as root of dependency tree)!

        tgt_heads, tgt_labels = tgt_seqs

        src_len = len(tgt_heads[0])
        batch_size = len(tgt_heads)
        np_tgt_heads = np.array(tgt_heads).flatten()  # (src_len * batch_size)
        np_tgt_labels = np.array(tgt_labels).flatten()
        s_arc, s_label = self.cal_scores(src_encodings)  # (src_len, src_len, bs), ([(src_len, src_len, bs)])

        s_arc_value = s_arc.npvalue()
        s_arc_choice = np.argmax(s_arc_value, axis=0).transpose().flatten()  # (src_len * batch_size)

        s_pick_labels = [dy.pick_batch(dy.reshape(score, (src_len,), batch_size=src_len * batch_size), s_arc_choice)
                     for score in s_label]
        s_argmax_labels = dy.concatenate(s_pick_labels, d=0)  # n_labels, src_len * batch_size

        reshape_s_arc = dy.reshape(s_arc, (src_len,), batch_size=src_len * batch_size)
        arc_loss = dy.pickneglogsoftmax_batch(reshape_s_arc, np_tgt_heads)
        label_loss = dy.pickneglogsoftmax_batch(s_argmax_labels, np_tgt_labels)

        loss = dy.sum_batches(arc_loss + label_loss) / batch_size
        return loss
    def build_graph(self, x):
        conv_W_1 = dy.parameter(self.params['conv_W_1'])
        conv_b_1 = dy.parameter(self.params['conv_b_1'])
        conv_W_2 = dy.parameter(self.params['conv_W_2'])
        conv_b_2 = dy.parameter(self.params['conv_b_2'])
        conv_W_3 = dy.parameter(self.params['conv_W_3'])
        conv_b_3 = dy.parameter(self.params['conv_b_3'])
        W = dy.parameter(self.params['W'])
        b = dy.parameter(self.params['b'])

        (n, d), _ = x.dim()
        x = dy.reshape(x, (1, n, d))

        # 一维卷积网络
        conv_1 = dy.tanh(
            dy.conv2d_bias(x, conv_W_1, conv_b_1, (1, 1), is_valid=False))
        conv_2 = dy.tanh(
            dy.conv2d_bias(x, conv_W_2, conv_b_2, (1, 1), is_valid=False))
        conv_3 = dy.tanh(
            dy.conv2d_bias(x, conv_W_3, conv_b_3, (1, 1), is_valid=False))

        pool_1 = dy.max_dim(dy.reshape(conv_1, (n, self.options['channel_1'])))
        pool_2 = dy.max_dim(dy.reshape(conv_2, (n, self.options['channel_2'])))
        pool_3 = dy.max_dim(dy.reshape(conv_3, (n, self.options['channel_3'])))

        # 全连接分类
        pool = dy.concatenate([pool_1, pool_2, pool_3], 0)
        logit = dy.dot_product(pool, W) + b
        return logit
Пример #18
0
  def calc_loss(self, src, trg, loss_calculator):
    if not batcher.is_batched(src):
      src = batcher.ListBatch([src])

    src_inputs = batcher.ListBatch([s[:-1] for s in src], mask=batcher.Mask(src.mask.np_arr[:,:-1]) if src.mask else None)
    src_targets = batcher.ListBatch([s[1:] for s in src], mask=batcher.Mask(src.mask.np_arr[:,1:]) if src.mask else None)

    self.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src_inputs)
    encodings = self.rnn.transduce(embeddings)
    encodings_tensor = encodings.as_tensor()
    ((hidden_dim, seq_len), batch_size) = encodings.dim()
    encoding_reshaped = dy.reshape(encodings_tensor, (hidden_dim,), batch_size=batch_size * seq_len)
    outputs = self.transform(encoding_reshaped)

    ref_action = np.asarray([sent.words for sent in src_targets]).reshape((seq_len * batch_size,))
    loss_expr_perstep = self.scorer.calc_loss(outputs, batcher.mark_as_batch(ref_action))
    loss_expr_perstep = dy.reshape(loss_expr_perstep, (seq_len,), batch_size=batch_size)
    if src_targets.mask:
      loss_expr_perstep = dy.cmult(loss_expr_perstep, dy.inputTensor(1.0-src_targets.mask.np_arr.T, batched=True))
    loss_expr = dy.sum_elems(loss_expr_perstep)

    model_loss = loss.FactoredLossExpr()
    model_loss.add_loss("mle", loss_expr)

    return model_loss
Пример #19
0
def calc_predict_and_activations(wids, tag, words):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE - len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in,
                             W_cnn,
                             b_cnn,
                             stride=(1, 1),
                             is_valid=False)
    filters = (dy.reshape(cnn_out, (len(wids), FILTER_SIZE))).npvalue()
    activations = filters.argmax(axis=0)

    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE, ))
    pool_out = dy.rectify(pool_out)

    scores = (W_sm * pool_out + b_sm).npvalue()
    print('%d ||| %s' % (tag, ' '.join(words)))
    predict = np.argmax(scores)
    print(display_activations(words, activations))
    print('scores=%s, predict: %d' % (scores, predict))
    features = pool_out.npvalue()
    W = W_sm.npvalue()
    bias = b_sm.npvalue()
    print('  bias=%s' % bias)
    contributions = W * features
    print(' very bad (%.4f): %s' % (scores[0], contributions[0]))
    print('      bad (%.4f): %s' % (scores[1], contributions[1]))
    print('  neutral (%.4f): %s' % (scores[2], contributions[2]))
    print('     good (%.4f): %s' % (scores[3], contributions[3]))
    print('very good (%.4f): %s' % (scores[4], contributions[4]))
Пример #20
0
 def shape_projection(self, x, batch_size):
     total_words = x.dim()[1]
     seq_len = total_words / batch_size
     out = dy.reshape(x, (self.model_dim, seq_len), batch_size=batch_size)
     out = dy.transpose(out)
     return dy.reshape(out, (seq_len, self.dim_per_head),
                       batch_size=batch_size * self.head_count)
Пример #21
0
 def transduce(
     self, src: expression_seqs.ExpressionSequence
 ) -> expression_seqs.ExpressionSequence:
     src_tensor = src.as_tensor()
     out_mask = src.mask
     if self.downsample_by > 1:
         assert len(src_tensor.dim()[0])==2, \
           f"Downsampling only supported for tensors of order two. Found dims {src_tensor.dim()}"
         (hidden_dim, seq_len), batch_size = src_tensor.dim()
         if seq_len % self.downsample_by != 0:
             raise ValueError(
                 "For downsampling, sequence lengths must be multiples of the total reduce factor. "
                 "Configure batcher accordingly.")
         src_tensor = dy.reshape(src_tensor,
                                 (hidden_dim * self.downsample_by,
                                  seq_len // self.downsample_by),
                                 batch_size=batch_size)
         if out_mask:
             out_mask = out_mask.lin_subsampled(
                 reduce_factor=self.downsample_by)
     output = self.transform.transform(src_tensor)
     if self.downsample_by == 1:
         if len(output.dim()) != src_tensor.dim(
         ):  # can happen with seq length 1
             output = dy.reshape(output,
                                 src_tensor.dim()[0],
                                 batch_size=src_tensor.dim()[1])
     output_seq = expression_seqs.ExpressionSequence(expr_tensor=output,
                                                     mask=out_mask)
     self._final_states = [FinalTransducerState(output_seq[-1])]
     return output_seq
Пример #22
0
    def recurrence(self, xt, hmtm1, h_history_tm1, dropout_flag):
        """

        :param xt: input vector at the time step t
        :param hmtm1: hidden memories in previous n_steps steps
        :param h_tilde_tm1: previous hidden summary
        :param dropout_flag: make a decision for conducting partial dropout
        :return:
        """
        score = dy.concatenate([dy.dot_product(self.u, dy.tanh( \
            self.W_h * hmtm1[i] + self.W_x * xt + self.W_htilde * h_history_tm1)) for i in range(self.n_steps)])
        # normalize the attention score
        score = dy.softmax(score)
        # shape: (1, n_out), history of [h[t-n_steps-1], ..., h[t-2]]
        h_history_t = dy.reshape(dy.transpose(score) * hmtm1[:-1], d=(self.n_out,))
        htm1 = hmtm1[-1]
        #h_tilde_t = dy.concatenate([h_history_t, htm1])
        h_tilde_t = htm1 + dy.rectify(h_history_t)
        if dropout_flag:
            # perform partial dropout, i.e., add dropout over the matrices W_x*
            rt = dy.logistic(dy.dropout(self.W_xr, self.dropout_rate) * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(dy.dropout(self.W_xz, self.dropout_rate) * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(dy.dropout(self.W_xh, self.dropout_rate) * xt + self.W_hh * dy.cmult(rt, h_tilde_t) \
                             + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        else:
            rt = dy.logistic(self.W_xr * xt + self.W_hr * h_tilde_t + self.br)
            zt = dy.logistic(self.W_xz * xt + self.W_hz * h_tilde_t + self.bz)
            ht_hat = dy.tanh(self.W_xh * xt + self.W_hh * dy.cmult(rt, h_tilde_t) + self.bh)
            ht = dy.cmult(zt, h_tilde_t) + dy.cmult((1.0 - zt), ht_hat)
        hmt = dy.concatenate([hmtm1[1:], dy.reshape(ht, (1, self.n_out))])
        return hmt, h_history_t
Пример #23
0
    def scorer(self, q_d_hists, q_idf, bm25_score, overlap_features, p):
        """
        Makes all the calculations and returns a relevance score
        """
        idf_vec = dy.inputVector(q_idf)
        bm25_score = dy.scalarInput(bm25_score)
        overlap_features = dy.inputVector(overlap_features)
        # Pass each query term representation through the MLP
        term_scores = []
        for hist in q_d_hists:
            q_d_hist = dy.reshape(dy.inputVector(hist), (1, len(hist)))
            hidd_out = dy.rectify(q_d_hist * self.W_1 + self.b_1)
            for i in range(0, self.mlp_layers):
                hidd_out = dy.rectify(hidd_out * self.W_n[i] + self.b_n[i])
            term_scores.append(hidd_out * self.W_last + self.b_last)

        # Term Gating
        gating_weights = idf_vec * self.w_g
        
        bm25_feature = bm25_score * self.W_bm25 + self.b_bm25 
        drop_out =  dy.scalarInput(1)
        drop_num = (np.random.rand(1) < p)/p #p= probability of keeping a unit active
        drop_out.set(drop_num)
        
        bm25_feature *= drop_out
        drmm_score = dy.transpose(dy.concatenate(term_scores)) * dy.reshape(gating_weights, (len(q_idf), 1)) #basic MLPs output
        doc_score = dy.transpose(dy.concatenate([drmm_score, overlap_features])) * self.W_scores + self.b_scores #extra features layer
        
        
        return doc_score
Пример #24
0
 def encode(input_, train):
     dims = tuple([1] + list(input_.dim()[0]))
     input_ = dy.reshape(input_, dims)
     x = first_layer(input_, train)
     x = residual(x, train)
     new_shape = x.dim()[0]
     x = dy.reshape(x, new_shape[1:])
     return x
Пример #25
0
 def norm(x):
     """Layer Norm only handles a vector in dynet so fold extra dims into the batch."""
     shape, batchsz = x.dim()
     first = shape[0]
     fold = np.prod(shape[1:])
     x = dy.reshape(x, (first, ), batch_size=batchsz * fold)
     x = dy.layer_norm(x, a, b)
     return dy.reshape(x, shape, batch_size=batchsz)
Пример #26
0
def folded_softmax(x, softmax=dy.softmax):
    """Dynet only allows for softmax on matrices."""
    shape, batchsz = x.dim()
    first = shape[0]
    flat = np.prod(shape[1:])
    x = dy.reshape(x, (first, flat), batch_size=batchsz)
    x = softmax(x, d=0)
    return dy.reshape(x, shape, batch_size=batchsz)
Пример #27
0
 def encode(input_, train):
     dims = tuple([1] + list(input_.dim()[0]))
     input_ = dy.reshape(input_, dims)
     x = first_layer(input_, train)
     x = residual(x, train)
     new_shape = x.dim()[0]
     x = dy.reshape(x, new_shape[1:])
     return x
Пример #28
0
 def norm(x):
     """Layer Norm only handles a vector in dynet so fold extra dims into the batch."""
     shape, batchsz = x.dim()
     first = shape[0]
     fold = np.prod(shape[1:])
     x = dy.reshape(x, (first,), batch_size=batchsz*fold)
     x = dy.layer_norm(x, a, b)
     return dy.reshape(x, shape, batch_size=batchsz)
Пример #29
0
 def transduce(self, seq: ExpressionSequence) -> ExpressionSequence:
   seq_tensor = self.child.transduce(seq).as_tensor() + seq.as_tensor()
   if self.layer_norm:
     d = seq_tensor.dim()
     seq_tensor = dy.reshape(seq_tensor, (d[0][0],), batch_size=d[0][1]*d[1])
     seq_tensor = dy.layer_norm(seq_tensor, self.ln_g, self.ln_b)
     seq_tensor = dy.reshape(seq_tensor, d[0], batch_size=d[1])
   return ExpressionSequence(expr_tensor=seq_tensor)
Пример #30
0
def folded_softmax(x, softmax=dy.softmax):
    """Dynet only allows for softmax on matrices."""
    shape, batchsz = x.dim()
    first = shape[0]
    flat = np.prod(shape[1:])
    x = dy.reshape(x, (first, flat), batch_size=batchsz)
    x = softmax(x, d=0)
    return dy.reshape(x, shape, batch_size=batchsz)
Пример #31
0
def do_one_batch(X_batch, Z_batch):
    # Flatten the batch into 1-D vector for workaround
    batch_size = X_batch.shape[0]
    if DO_BATCH:
        X_batch_f = X_batch.flatten('F')
        Z_batch_f = Z_batch.flatten('F')
        x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes),
                       batch_size=batch_size)
        z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg),
                       batch_size=batch_size)
        scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])])
        vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])])

    else:
        x = dy.matInput(X_batch.shape[0], X_batch.shape[1])
        x.set(X_batch.flatten('F'))
        z = dy.vecInput(Z_batch.shape[0])
        z.set(Z_batch.flatten('F'))
        x = dy.reshape(dy.transpose(x, [1, 0]),
                       (1, X_batch.shape[1], X_batch.shape[0]))
    print(x.npvalue().shape)
    a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False)
    h1 = dy.rectify(a_h1)
    h1_pool = dy.kmax_pooling(h1, D[1], d=1)

    a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False)
    h2 = dy.rectify(a_h2)
    h2_pool = dy.kmax_pooling(h2, D[2], d=1)

    a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False)
    h3 = dy.rectify(a_h3)
    h3_pool = dy.kmax_pooling(h3, D[3], d=1)

    h4 = dy.kmax_pooling(h3_pool, 1, d=1)
    h4_re = dy.reshape(h4, (J[3], ))
    #print(h4_re.npvalue().shape)
    g = dy.scalarInput(1.)
    zem_sp = dy.weight_norm(h4_re, g)
    #print(zem_sp.npvalue().shape)
    zem_vgg = w_embed * z + b_embed
    #print(zem_vgg.npvalue().shape)

    sa = dy.transpose(zem_sp) * zem_vgg
    s = dy.rectify(sa)

    if PRINT_EMBED:
        print('Vgg embedding vector:', zem_vgg.npvalue().shape)
        print(zem_vgg.value())

        print('Speech embedding vector:', zem_sp.npvalue().shape)
        print(zem_sp.value())
    if PRINT_SIM:
        print('Raw Similarity:', sa.npvalue())
        print(sa.value())
        print('Similarity:', s.npvalue())
        print(s.value())

    return s
Пример #32
0
 def forward(self, state):
   # State should be a length-four matrix
   l1 =dy.reshape(dy.inputTensor(state),
                              (1,
                               4)) * dy.parameter(self.w_1) + dy.reshape(dy.parameter(self.b_1),
                                                                         (1, 4))
   l2 = l1 * dy.parameter(self.w_2)
   
   return dy.transpose(l2)
Пример #33
0
    def transduce(self, expr_seq: ExpressionSequence) -> ExpressionSequence:
        """
    transduce the sequence

    Args:
      expr_seq: expression sequence or list of expression sequences (where each inner list will be concatenated)
    Returns:
      expression sequence
    """

        Wq, Wk, Wv, Wo = [
            dy.parameter(x) for x in (self.pWq, self.pWk, self.pWv, self.pWo)
        ]
        bq, bk, bv, bo = [
            dy.parameter(x) for x in (self.pbq, self.pbk, self.pbv, self.pbo)
        ]

        # Start with a [(length, model_size) x batch] tensor
        x = expr_seq.as_transposed_tensor()
        x_len = x.dim()[0][0]
        x_batch = x.dim()[1]
        # Get the query key and value vectors
        # TODO: do we need bias broadcasting in DyNet?
        # q = dy.affine_transform([bq, x, Wq])
        # k = dy.affine_transform([bk, x, Wk])
        # v = dy.affine_transform([bv, x, Wv])
        q = bq + x * Wq
        k = bk + x * Wk
        v = bv + x * Wv

        # Split to batches [(length, head_dim) x batch * num_heads] tensor
        q, k, v = [
            dy.reshape(x, (x_len, self.head_dim),
                       batch_size=x_batch * self.num_heads) for x in (q, k, v)
        ]

        # Do scaled dot product [(length, length) x batch * num_heads], rows are queries, columns are keys
        attn_score = q * dy.transpose(k) / sqrt(self.head_dim)
        if expr_seq.mask is not None:
            mask = dy.inputTensor(np.repeat(
                expr_seq.mask.np_arr, self.num_heads, axis=0).transpose(),
                                  batched=True) * -1e10
            attn_score = attn_score + mask
        attn_prob = dy.softmax(attn_score, d=1)
        # Reduce using attention and resize to match [(length, model_size) x batch]
        o = dy.reshape(attn_prob * v, (x_len, self.input_dim),
                       batch_size=x_batch)
        # Final transformation
        # o = dy.affine_transform([bo, attn_prob * v, Wo])
        o = bo + o * Wo

        expr_seq = ExpressionSequence(expr_transposed_tensor=o,
                                      mask=expr_seq.mask)

        self._final_states = [FinalTransducerState(expr_seq[-1], None)]

        return expr_seq
Пример #34
0
 def create_network_return_best(self, x):
     dy.renew_cg()
     emb_vectors = [self.lookup[self.corpus.get(item, len(self.corpus))] for item in x]
     calc_avg = dy.average(emb_vectors)
     emb_vectors_mean = dy.reshape(calc_avg, (1, self.dim))
     z1 = (emb_vectors_mean * self._pW1) + self._pB1
     a1 = dy.tanh(z1)
     net_output = dy.softmax(dy.reshape((a1 * self._kW1) + self._kB1, (self.numClasses,)))
     return np.argmax(net_output.npvalue())
Пример #35
0
    def parse(self, words, extwords, tags):
        arc_logits, rel_logits = self.forward(words, extwords, tags, False)
        seq_len = len(words)
        flat_arc_logits = dy.reshape(arc_logits, (seq_len, ), seq_len)
        arc_probs = dy.softmax(flat_arc_logits)
        flat_rel_logits = dy.reshape(rel_logits, (seq_len, self.rel_size),
                                     seq_len)
        rel_probs = dy.softmax(dy.transpose(flat_rel_logits))

        return arc_probs, rel_probs
Пример #36
0
 def __call__(self, x, dropout=False):
   if args.conv:
     x = dy.reshape(x, (28, 28, 1))
     x = dy.conv2d_bias(x, self.F1, self.b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.conv2d_bias(x, self.F2, self.b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))  # 7x7x64
     x = dy.reshape(x, (7 * 7 * 64,))
   h = dy.rectify(self.W1 * x + self.hbias)
   if dropout:
     h = dy.dropout(h, DROPOUT_RATE)
   logits = self.W2 * h
   return logits
Пример #37
0
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d'%wlen not in self.param_exprs:
            self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1])
            self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1])
            self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1])
            self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1])
            self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1])
            self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1])
          
        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen])
        comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars])
        update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen]
        
        update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])]))
        
        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1))))
        return word
Пример #38
0
 def conv(input_, _=None):
     dims = tuple([1] + list(input_.dim()[0]))
     input_ = dy.reshape(input_, dims)
     mots = []
     for conv in convs:
         mots.append(mot_pool(conv(input_)))
     return dy.concatenate(mots)
Пример #39
0
def unsqueeze(x, dim):
    """Add a dimension of size 1 to `x` at position `dim`."""
    shape, batchsz = x.dim()
    dim = len(shape) + dim + 1 if dim < 0 else dim
    shape = list(shape)
    shape.insert(dim, 1)
    return dy.reshape(x, tuple(shape), batch_size=batchsz)
Пример #40
0
def calc_loss(words, labels, heads):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))]
    return biaffineParser.decode_loss(src_encodings, ([heads], [labels]))
Пример #41
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output()
    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words
Пример #42
0
def calc_acc(words, labels, heads):
    dy.renew_cg()
    word_embs = [dy.lookup(W_emb, x) for x in words]
    fwd_init = fwdLSTM.initial_state()
    fwd_embs = fwd_init.transduce(word_embs)
    bwd_init = bwdLSTM.initial_state()
    bwd_embs = bwd_init.transduce(reversed(word_embs))
    src_encodings = [dy.reshape(dy.concatenate([f, b]), (HID_SIZE * 2, 1)) for f, b in zip(fwd_embs, reversed(bwd_embs))]
    pred_heads, pred_labels = biaffineParser.decoding(src_encodings)
    return biaffineParser.cal_accuracy(pred_heads, pred_labels, heads, labels)
Пример #43
0
def batch_matmul(x, y):
    """Matmul between first two layers but the rest are ignored.

    Input: ((X, Y, ..), B) and ((Y, Z, ..), B)
    Output: ((X, Z, ..), B)
    """
    x_shape, batchsz = x.dim()
    x_mat = x_shape[:2]
    sames = x_shape[2:]
    fold = np.prod(sames)
    y_shape, _ = y.dim()
    y_mat = y_shape[:2]

    x = dy.reshape(x, x_mat, batch_size=fold*batchsz)
    y = dy.reshape(y, y_mat, batch_size=fold*batchsz)

    z = x * y
    z = dy.reshape(z, tuple([x_mat[0], y_mat[1]] + list(sames)), batch_size=batchsz)
    return z
Пример #44
0
def squeeze(x, d=-1):
    shape, batchsz = x.dim()
    if d == -1:
        shape = tuple(filter(lambda x: x != 1, shape))
    else:
        assert shape[d] == 1, "Cannot squeeze dimension {} of size {}".format(d, shape[d])
        shape = list(shape)
        _ = shape.pop(d)
        shape = tuple(shape)
    return dy.reshape(x, shape, batch_size=batchsz)
Пример #45
0
def calc_scores(wids):
    dy.renew_cg()
    if len(wids) < WIN_SIZE:
        wids += [0] * (WIN_SIZE-len(wids))

    cnn_in = dy.concatenate([dy.lookup(W_emb, x) for x in wids], d=1)
    cnn_out = dy.conv2d_bias(cnn_in, W_cnn, b_cnn, stride=(1, 1), is_valid=False)
    pool_out = dy.max_dim(cnn_out, d=1)
    pool_out = dy.reshape(pool_out, (FILTER_SIZE,))
    pool_out = dy.rectify(pool_out)
    return W_sm * pool_out + b_sm
Пример #46
0
    def __call__(self, query, key, value, mask=None, train=False):
        """Input: ((H, T), B) Output: ((H, T), B)"""
        _, batchsz = query.dim()
        query = self.p_Q(query)
        t = query.dim()[0][1]
        query = dy.reshape(query, (self.d_k, self.h, t), batch_size=batchsz)
        query = transpose(query, 1, 2)

        key = self.p_K(key)
        t = key.dim()[0][1]
        key = dy.reshape(key, (self.d_k, self.h, t), batch_size=batchsz)
        key = transpose(key, 1, 2)

        value = self.p_V(value)
        t = value.dim()[0][1]
        value = dy.reshape(value, (self.d_k, self.h, t), batch_size=batchsz)
        value = transpose(value, 1, 2)

        pdrop = self.pdrop if train else None
        x = self.attn(query, key, value, mask=mask, dropout=pdrop)
        x = transpose(x, 1, 2)
        t = x.dim()[0][2]
        x = dy.reshape(x, (self.h * self.d_k, t), batch_size=batchsz)
        return self.p_O(x)
Пример #47
0
 def evaluate(self, inputs, train=False):
     """
     Apply all MLP layers to concatenated input
     :param inputs: (key, vector) per feature type
     :param train: are we training now?
     :return: output vector of size self.output_dim
     """
     input_keys, inputs = list(map(list, zip(*list(inputs))))
     if self.input_keys:
         assert input_keys == self.input_keys, "Got:     %s\nBut expected input keys: %s" % (
             self.input_keys_str(self.input_keys), self.input_keys_str(input_keys))
     else:
         self.input_keys = input_keys
     if self.gated:
         gates = self.params.get("gates")
         if gates is None:  # FIXME attention weights should not be just parameters, but based on biaffine product?
             gates = self.params["gates"] = self.model.add_parameters((len(inputs), self.gated),
                                                                      init=dy.UniformInitializer(1))
         input_dims = [i.dim()[0][0] for i in inputs]
         max_dim = max(input_dims)
         x = dy.concatenate_cols([dy.concatenate([i, dy.zeroes(max_dim - d)])  # Pad with zeros to get uniform dim
                                  if d < max_dim else i for i, d in zip(inputs, input_dims)]) * gates
         # Possibly multiple "attention heads" -- concatenate outputs to one vector
         inputs = [dy.reshape(x, (x.dim()[0][0] * x.dim()[0][1],))]
     x = dy.concatenate(inputs)
     assert len(x.dim()[0]) == 1, "Input should be a vector, but has dimension " + str(x.dim()[0])
     dim = x.dim()[0][0]
     if self.input_dim:
         assert dim == self.input_dim, "Input dim mismatch: %d != %d" % (dim, self.input_dim)
     else:
         self.init_params(dim)
     self.config.print(self, level=4)
     if self.total_layers:
         if self.weights is None:
             self.weights = [[self.params[prefix + str(i)] for prefix in ("W", "b")]
                             for i in range(self.total_layers)]
             if self.weights[0][0].dim()[0][1] < dim:  # number of columns in W0
                 self.weights[0][0] = dy.concatenate_cols([self.weights[0][0], self.params["W0+"]])
         for i, (W, b) in enumerate(self.weights):
             self.config.print(lambda: x.npvalue().tolist(), level=4)
             try:
                 if train and self.dropout:
                     x = dy.dropout(x, self.dropout)
                 x = self.activation()(W * x + b)
             except ValueError as e:
                 raise ValueError("Error in evaluating layer %d of %d" % (i + 1, self.total_layers)) from e
     self.config.print(lambda: x.npvalue().tolist(), level=4)
     return x
Пример #48
0
    def BuildLMGraph(self, sents):
        dy.renew_cg()
        # initialize the RNN
        init_state = self.builder.initial_state()
        # parameters -> expressions
        R = dy.parameter(self.R)
        bias = dy.parameter(self.bias)

        S = vocab.w2i["<s>"]
        # get the cids and masks for each step
        tot_chars = 0
        cids = []
        masks = []

        for i in range(len(sents[0])):
            cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S) for sent in sents])
            mask = [(1 if len(sent)>i else 0) for sent in sents]
            masks.append(mask)
            tot_chars += sum(mask)

        # start the rnn with "<s>"
        init_ids = cids[0]
        s = init_state.add_input(lookup_batch(self.lookup, init_ids))

        losses = []

        # feed char vectors into the RNN and predict the next char
        for cid, mask in zip(cids[1:], masks[1:]):
            score = dy.affine_transform([bias, R, s.output()])
            loss = dy.pickneglogsoftmax_batch(score, cid)
            # mask the loss if at least one sentence is shorter
            if mask[-1] != 1:
                mask_expr = dy.inputVector(mask)
                mask_expr = dy.reshape(mask_expr, (1,), len(sents))
                loss = loss * mask_expr

            losses.append(loss)
            # update the state of the RNN
            cemb = dy.lookup_batch(self.lookup, cid)
            s = s.add_input(cemb)

        return dy.sum_batches(dy.esum(losses)), tot_chars
Пример #49
0
 def __call__(self, inputs, dropout=False):
     x = dy.inputTensor(inputs)
     conv1 = dy.parameter(self.pConv1)
     b1 = dy.parameter(self.pB1)
     x = dy.conv2d_bias(x, conv1, b1, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     conv2 = dy.parameter(self.pConv2)
     b2 = dy.parameter(self.pB2)
     x = dy.conv2d_bias(x, conv2, b2, [1, 1], is_valid=False)
     x = dy.rectify(dy.maxpooling2d(x, [2, 2], [2, 2]))
     x = dy.reshape(x, (7*7*64, 1))
     w1 = dy.parameter(self.pW1)
     b3 = dy.parameter(self.pB3)
     h = dy.rectify(w1*x+b3)
     if dropout:
         h = dy.dropout(h, DROPOUT_RATE)
     w2 = dy.parameter(self.pW2)
     output = w2*h
     # output = dy.softmax(w2*h)
     return output
Пример #50
0
def calc_lm_loss(sents):
    dy.renew_cg()

    # initialize the RNN
    f_init = RNN.initial_state()

    # get the wids and masks for each step
    tot_words = 0
    wids = []
    masks = []
    for i in range(len(sents[0])):
        wids.append([(sent[i] if len(sent) > i else S) for sent in sents])
        mask = [(1 if len(sent) > i else 0) for sent in sents]
        masks.append(mask)
        tot_words += sum(mask)

    # start the rnn by inputting "<s>"
    init_ids = [S] * len(sents)
    s = f_init.add_input(dy.lookup_batch(WORDS_LOOKUP, init_ids))

    # feed word vectors into the RNN and predict the next word
    losses = []
    for wid, mask in zip(wids, masks):
        # calculate the softmax and loss
        score = dy.affine_transform([b_exp, W_exp, s.output()])
        loss = dy.pickneglogsoftmax_batch(score, wid)
        # mask the loss if at least one sentence is shorter
        if mask[-1] != 1:
            mask_expr = dy.inputVector(mask)
            mask_expr = dy.reshape(mask_expr, (1,), len(sents))
            loss = loss * mask_expr
        losses.append(loss)
        # update the state of the RNN
        wemb = dy.lookup_batch(WORDS_LOOKUP, wid)
        s = s.add_input(wemb)

    return dy.sum_batches(dy.esum(losses)), tot_words
Пример #51
0
def mot_pool(x, strides=(1, 1, 1, 1)):
    # dy.max_dim(x, d=0) is currently slow (see https://github.com/clab/dynet/issues/1011)
    # So we do the max using max pooling instead.
    ((_, seq_len, cmotsz), _) = x.dim()
    pooled = dy.maxpooling2d(x, [1, seq_len, 1], strides)
    return dy.reshape(pooled, (cmotsz,))
Пример #52
0
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #get the outputs of the first LSTM
    src_outputs = [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])]
    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix

    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()
        att_output, _ = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words